busybox/editors/awk.c
<<
>>
Prefs
   1/* vi: set sw=4 ts=4: */
   2/*
   3 * awk implementation for busybox
   4 *
   5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
   6 *
   7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
   8 */
   9//config:config AWK
  10//config:       bool "awk (23 kb)"
  11//config:       default y
  12//config:       help
  13//config:       Awk is used as a pattern scanning and processing language.
  14//config:
  15//config:config FEATURE_AWK_LIBM
  16//config:       bool "Enable math functions (requires libm)"
  17//config:       default y
  18//config:       depends on AWK
  19//config:       help
  20//config:       Enable math functions of the Awk programming language.
  21//config:       NOTE: This requires libm to be present for linking.
  22//config:
  23//config:config FEATURE_AWK_GNU_EXTENSIONS
  24//config:       bool "Enable a few GNU extensions"
  25//config:       default y
  26//config:       depends on AWK
  27//config:       help
  28//config:       Enable a few features from gawk:
  29//config:       * command line option -e AWK_PROGRAM
  30//config:       * simultaneous use of -f and -e on the command line.
  31//config:       This enables the use of awk library files.
  32//config:       Example: awk -f mylib.awk -e '{print myfunction($1);}' ...
  33
  34//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
  35
  36//kbuild:lib-$(CONFIG_AWK) += awk.o
  37
  38//usage:#define awk_trivial_usage
  39//usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
  40//usage:#define awk_full_usage "\n\n"
  41//usage:       "        -v VAR=VAL      Set variable"
  42//usage:     "\n        -F SEP          Use SEP as field separator"
  43//usage:     "\n        -f FILE         Read program from FILE"
  44//usage:        IF_FEATURE_AWK_GNU_EXTENSIONS(
  45//usage:     "\n        -e AWK_PROGRAM"
  46//usage:        )
  47
  48#include "libbb.h"
  49#include "xregex.h"
  50#include <math.h>
  51
  52/* This is a NOEXEC applet. Be very careful! */
  53
  54
  55/* If you comment out one of these below, it will be #defined later
  56 * to perform debug printfs to stderr: */
  57#define debug_printf_walker(...)  do {} while (0)
  58#define debug_printf_eval(...)  do {} while (0)
  59#define debug_printf_parse(...)  do {} while (0)
  60
  61#ifndef debug_printf_walker
  62# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
  63#endif
  64#ifndef debug_printf_eval
  65# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
  66#endif
  67#ifndef debug_printf_parse
  68# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
  69#endif
  70
  71
  72/* "+": stop on first non-option:
  73 * $ awk 'BEGIN { for(i=1; i<ARGC; ++i) { print i ": " ARGV[i] }}' -argz
  74 * 1: -argz
  75 */
  76#define OPTSTR_AWK "+" \
  77        "F:v:*f:*" \
  78        IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
  79        "W:"
  80enum {
  81        OPTBIT_F,       /* define field separator */
  82        OPTBIT_v,       /* define variable */
  83        OPTBIT_f,       /* pull in awk program from file */
  84        IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
  85        OPTBIT_W,       /* -W ignored */
  86        OPT_F = 1 << OPTBIT_F,
  87        OPT_v = 1 << OPTBIT_v,
  88        OPT_f = 1 << OPTBIT_f,
  89        OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
  90        OPT_W = 1 << OPTBIT_W
  91};
  92
  93#define MAXVARFMT       240
  94#define MINNVBLOCK      64
  95
  96/* variable flags */
  97#define VF_NUMBER       0x0001  /* 1 = primary type is number */
  98#define VF_ARRAY        0x0002  /* 1 = it's an array */
  99
 100#define VF_CACHED       0x0100  /* 1 = num/str value has cached str/num eq */
 101#define VF_USER         0x0200  /* 1 = user input (may be numeric string) */
 102#define VF_SPECIAL      0x0400  /* 1 = requires extra handling when changed */
 103#define VF_WALK         0x0800  /* 1 = variable has alloc'd x.walker list */
 104#define VF_FSTR         0x1000  /* 1 = var::string points to fstring buffer */
 105#define VF_CHILD        0x2000  /* 1 = function arg; x.parent points to source */
 106#define VF_DIRTY        0x4000  /* 1 = variable was set explicitly */
 107
 108/* these flags are static, don't change them when value is changed */
 109#define VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
 110
 111typedef struct walker_list {
 112        char *end;
 113        char *cur;
 114        struct walker_list *prev;
 115        char wbuf[1];
 116} walker_list;
 117
 118/* Variable */
 119typedef struct var_s {
 120        unsigned type;            /* flags */
 121        double number;
 122        char *string;
 123        union {
 124                int aidx;               /* func arg idx (for compilation stage) */
 125                struct xhash_s *array;  /* array ptr */
 126                struct var_s *parent;   /* for func args, ptr to actual parameter */
 127                walker_list *walker;    /* list of array elements (for..in) */
 128        } x;
 129} var;
 130
 131/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
 132typedef struct chain_s {
 133        struct node_s *first;
 134        struct node_s *last;
 135        const char *programname;
 136} chain;
 137
 138/* Function */
 139typedef struct func_s {
 140        unsigned nargs;
 141        struct chain_s body;
 142} func;
 143
 144/* I/O stream */
 145typedef struct rstream_s {
 146        FILE *F;
 147        char *buffer;
 148        int adv;
 149        int size;
 150        int pos;
 151        smallint is_pipe;
 152} rstream;
 153
 154typedef struct hash_item_s {
 155        union {
 156                struct var_s v;         /* variable/array hash */
 157                struct rstream_s rs;    /* redirect streams hash */
 158                struct func_s f;        /* functions hash */
 159        } data;
 160        struct hash_item_s *next;       /* next in chain */
 161        char name[1];                   /* really it's longer */
 162} hash_item;
 163
 164typedef struct xhash_s {
 165        unsigned nel;           /* num of elements */
 166        unsigned csize;         /* current hash size */
 167        unsigned nprime;        /* next hash size in PRIMES[] */
 168        unsigned glen;          /* summary length of item names */
 169        struct hash_item_s **items;
 170} xhash;
 171
 172/* Tree node */
 173typedef struct node_s {
 174        uint32_t info;
 175        unsigned lineno;
 176        union {
 177                struct node_s *n;
 178                var *v;
 179                int aidx;
 180                char *new_progname;
 181                regex_t *re;
 182        } l;
 183        union {
 184                struct node_s *n;
 185                regex_t *ire;
 186                func *f;
 187        } r;
 188        union {
 189                struct node_s *n;
 190        } a;
 191} node;
 192
 193/* Block of temporary variables */
 194typedef struct nvblock_s {
 195        int size;
 196        var *pos;
 197        struct nvblock_s *prev;
 198        struct nvblock_s *next;
 199        var nv[];
 200} nvblock;
 201
 202typedef struct tsplitter_s {
 203        node n;
 204        regex_t re[2];
 205} tsplitter;
 206
 207/* simple token classes */
 208/* Order and hex values are very important!!!  See next_token() */
 209#define TC_SEQSTART     (1 << 0)                /* ( */
 210#define TC_SEQTERM      (1 << 1)                /* ) */
 211#define TC_REGEXP       (1 << 2)                /* /.../ */
 212#define TC_OUTRDR       (1 << 3)                /* | > >> */
 213#define TC_UOPPOST      (1 << 4)                /* unary postfix operator */
 214#define TC_UOPPRE1      (1 << 5)                /* unary prefix operator */
 215#define TC_BINOPX       (1 << 6)                /* two-opnd operator */
 216#define TC_IN           (1 << 7)
 217#define TC_COMMA        (1 << 8)
 218#define TC_PIPE         (1 << 9)                /* input redirection pipe */
 219#define TC_UOPPRE2      (1 << 10)               /* unary prefix operator */
 220#define TC_ARRTERM      (1 << 11)               /* ] */
 221#define TC_GRPSTART     (1 << 12)               /* { */
 222#define TC_GRPTERM      (1 << 13)               /* } */
 223#define TC_SEMICOL      (1 << 14)
 224#define TC_NEWLINE      (1 << 15)
 225#define TC_STATX        (1 << 16)               /* ctl statement (for, next...) */
 226#define TC_WHILE        (1 << 17)
 227#define TC_ELSE         (1 << 18)
 228#define TC_BUILTIN      (1 << 19)
 229/* This costs ~50 bytes of code.
 230 * A separate class to support deprecated "length" form. If we don't need that
 231 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
 232 * can be merged with TC_BUILTIN:
 233 */
 234#define TC_LENGTH       (1 << 20)
 235#define TC_GETLINE      (1 << 21)
 236#define TC_FUNCDECL     (1 << 22)               /* 'function' 'func' */
 237#define TC_BEGIN        (1 << 23)
 238#define TC_END          (1 << 24)
 239#define TC_EOF          (1 << 25)
 240#define TC_VARIABLE     (1 << 26)
 241#define TC_ARRAY        (1 << 27)
 242#define TC_FUNCTION     (1 << 28)
 243#define TC_STRING       (1 << 29)
 244#define TC_NUMBER       (1 << 30)
 245
 246#define TC_UOPPRE  (TC_UOPPRE1 | TC_UOPPRE2)
 247
 248/* combined token classes */
 249#define TC_BINOP   (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
 250//#define       TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
 251#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
 252                   | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
 253                   | TC_SEQSTART | TC_STRING | TC_NUMBER)
 254
 255#define TC_STATEMNT (TC_STATX | TC_WHILE)
 256#define TC_OPTERM  (TC_SEMICOL | TC_NEWLINE)
 257
 258/* word tokens, cannot mean something else if not expected */
 259#define TC_WORD    (TC_IN | TC_STATEMNT | TC_ELSE \
 260                   | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
 261                   | TC_FUNCDECL | TC_BEGIN | TC_END)
 262
 263/* discard newlines after these */
 264#define TC_NOTERM  (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
 265                   | TC_BINOP | TC_OPTERM)
 266
 267/* what can expression begin with */
 268#define TC_OPSEQ   (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
 269/* what can group begin with */
 270#define TC_GRPSEQ  (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
 271
 272/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
 273/* operator is inserted between them */
 274#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
 275                   | TC_STRING | TC_NUMBER | TC_UOPPOST)
 276#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
 277
 278#define OF_RES1    0x010000
 279#define OF_RES2    0x020000
 280#define OF_STR1    0x040000
 281#define OF_STR2    0x080000
 282#define OF_NUM1    0x100000
 283#define OF_CHECKED 0x200000
 284
 285/* combined operator flags */
 286#define xx      0
 287#define xV      OF_RES2
 288#define xS      (OF_RES2 | OF_STR2)
 289#define Vx      OF_RES1
 290#define VV      (OF_RES1 | OF_RES2)
 291#define Nx      (OF_RES1 | OF_NUM1)
 292#define NV      (OF_RES1 | OF_NUM1 | OF_RES2)
 293#define Sx      (OF_RES1 | OF_STR1)
 294#define SV      (OF_RES1 | OF_STR1 | OF_RES2)
 295#define SS      (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
 296
 297#define OPCLSMASK 0xFF00
 298#define OPNMASK   0x007F
 299
 300/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
 301 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
 302 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
 303 */
 304#undef P
 305#undef PRIMASK
 306#undef PRIMASK2
 307#define P(x)      (x << 24)
 308#define PRIMASK   0x7F000000
 309#define PRIMASK2  0x7E000000
 310
 311/* Operation classes */
 312
 313#define SHIFT_TIL_THIS  0x0600
 314#define RECUR_FROM_THIS 0x1000
 315
 316enum {
 317        OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
 318        OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
 319
 320        OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
 321        OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
 322        OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
 323
 324        OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
 325        OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
 326        OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
 327        OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
 328        OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
 329        OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
 330        OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
 331        OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
 332        OC_DONE = 0x2800,
 333
 334        ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
 335        ST_WHILE = 0x3300
 336};
 337
 338/* simple builtins */
 339enum {
 340        F_in,   F_rn,   F_co,   F_ex,   F_lg,   F_si,   F_sq,   F_sr,
 341        F_ti,   F_le,   F_sy,   F_ff,   F_cl
 342};
 343
 344/* builtins */
 345enum {
 346        B_a2,   B_ix,   B_ma,   B_sp,   B_ss,   B_ti,   B_mt,   B_lo,   B_up,
 347        B_ge,   B_gs,   B_su,
 348        B_an,   B_co,   B_ls,   B_or,   B_rs,   B_xo,
 349};
 350
 351/* tokens and their corresponding info values */
 352
 353#define NTC     "\377"  /* switch to next token class (tc<<1) */
 354#define NTCC    '\377'
 355
 356static const char tokenlist[] ALIGN1 =
 357        "\1("         NTC                                   /* TC_SEQSTART */
 358        "\1)"         NTC                                   /* TC_SEQTERM */
 359        "\1/"         NTC                                   /* TC_REGEXP */
 360        "\2>>"        "\1>"         "\1|"       NTC         /* TC_OUTRDR */
 361        "\2++"        "\2--"        NTC                     /* TC_UOPPOST */
 362        "\2++"        "\2--"        "\1$"       NTC         /* TC_UOPPRE1 */
 363        "\2=="        "\1="         "\2+="      "\2-="      /* TC_BINOPX */
 364        "\2*="        "\2/="        "\2%="      "\2^="
 365        "\1+"         "\1-"         "\3**="     "\2**"
 366        "\1/"         "\1%"         "\1^"       "\1*"
 367        "\2!="        "\2>="        "\2<="      "\1>"
 368        "\1<"         "\2!~"        "\1~"       "\2&&"
 369        "\2||"        "\1?"         "\1:"       NTC
 370        "\2in"        NTC                                   /* TC_IN */
 371        "\1,"         NTC                                   /* TC_COMMA */
 372        "\1|"         NTC                                   /* TC_PIPE */
 373        "\1+"         "\1-"         "\1!"       NTC         /* TC_UOPPRE2 */
 374        "\1]"         NTC                                   /* TC_ARRTERM */
 375        "\1{"         NTC                                   /* TC_GRPSTART */
 376        "\1}"         NTC                                   /* TC_GRPTERM */
 377        "\1;"         NTC                                   /* TC_SEMICOL */
 378        "\1\n"        NTC                                   /* TC_NEWLINE */
 379        "\2if"        "\2do"        "\3for"     "\5break"   /* TC_STATX */
 380        "\10continue" "\6delete"    "\5print"
 381        "\6printf"    "\4next"      "\10nextfile"
 382        "\6return"    "\4exit"      NTC
 383        "\5while"     NTC                                   /* TC_WHILE */
 384        "\4else"      NTC                                   /* TC_ELSE */
 385        "\3and"       "\5compl"     "\6lshift"  "\2or"      /* TC_BUILTIN */
 386        "\6rshift"    "\3xor"
 387        "\5close"     "\6system"    "\6fflush"  "\5atan2"
 388        "\3cos"       "\3exp"       "\3int"     "\3log"
 389        "\4rand"      "\3sin"       "\4sqrt"    "\5srand"
 390        "\6gensub"    "\4gsub"      "\5index"   /* "\6length" was here */
 391        "\5match"     "\5split"     "\7sprintf" "\3sub"
 392        "\6substr"    "\7systime"   "\10strftime" "\6mktime"
 393        "\7tolower"   "\7toupper"   NTC
 394        "\6length"    NTC                                   /* TC_LENGTH */
 395        "\7getline"   NTC                                   /* TC_GETLINE */
 396        "\4func"      "\10function" NTC                     /* TC_FUNCDECL */
 397        "\5BEGIN"     NTC                                   /* TC_BEGIN */
 398        "\3END"                                             /* TC_END */
 399        /* compiler adds trailing "\0" */
 400        ;
 401
 402#define OC_B  OC_BUILTIN
 403
 404static const uint32_t tokeninfo[] = {
 405        0,
 406        0,
 407        OC_REGEXP,
 408        xS|'a',                  xS|'w',                  xS|'|',
 409        OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
 410        OC_UNARY|xV|P(9)|'P',    OC_UNARY|xV|P(9)|'M',    OC_FIELD|xV|P(5),
 411        OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
 412        OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
 413        OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
 414        OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
 415        OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
 416        OC_COMPARE|VV|P(39)|2,   OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
 417        OC_LOR|Vx|P(59),         OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
 418        OC_IN|SV|P(49), /* TC_IN */
 419        OC_COMMA|SS|P(80),
 420        OC_PGETLINE|SV|P(37),
 421        OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
 422        0, /* ] */
 423        0,
 424        0,
 425        0,
 426        0, /* \n */
 427        ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
 428        OC_CONTINUE,  OC_DELETE|Vx, OC_PRINT,
 429        OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
 430        OC_RETURN|Vx, OC_EXIT|Nx,
 431        ST_WHILE,
 432        0, /* else */
 433        OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
 434        OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
 435        OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
 436        OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
 437        OC_FBLTIN|F_rn,    OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
 438        OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
 439        OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF,        OC_B|B_su|P(0xb6),
 440        OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti,    OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
 441        OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
 442        OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
 443        OC_GETLINE|SV|P(0),
 444        0,                 0,
 445        0,
 446        0 /* TC_END */
 447};
 448
 449/* internal variable names and their initial values       */
 450/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
 451enum {
 452        CONVFMT,    OFMT,       FS,         OFS,
 453        ORS,        RS,         RT,         FILENAME,
 454        SUBSEP,     F0,         ARGIND,     ARGC,
 455        ARGV,       ERRNO,      FNR,        NR,
 456        NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
 457};
 458
 459static const char vNames[] ALIGN1 =
 460        "CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
 461        "ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
 462        "SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
 463        "ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
 464        "NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
 465
 466static const char vValues[] ALIGN1 =
 467        "%.6g\0"    "%.6g\0"    " \0"       " \0"
 468        "\n\0"      "\n\0"      "\0"        "\0"
 469        "\034\0"    "\0"        "\377";
 470
 471/* hash size may grow to these values */
 472#define FIRST_PRIME 61
 473static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
 474
 475
 476/* Globals. Split in two parts so that first one is addressed
 477 * with (mostly short) negative offsets.
 478 * NB: it's unsafe to put members of type "double"
 479 * into globals2 (gcc may fail to align them).
 480 */
 481struct globals {
 482        double t_double;
 483        chain beginseq, mainseq, endseq;
 484        chain *seq;
 485        node *break_ptr, *continue_ptr;
 486        rstream *iF;
 487        xhash *vhash, *ahash, *fdhash, *fnhash;
 488        const char *g_progname;
 489        int g_lineno;
 490        int nfields;
 491        int maxfields; /* used in fsrealloc() only */
 492        var *Fields;
 493        nvblock *g_cb;
 494        char *g_pos;
 495        char *g_buf;
 496        smallint icase;
 497        smallint exiting;
 498        smallint nextrec;
 499        smallint nextfile;
 500        smallint is_f0_split;
 501        smallint t_rollback;
 502};
 503struct globals2 {
 504        uint32_t t_info; /* often used */
 505        uint32_t t_tclass;
 506        char *t_string;
 507        int t_lineno;
 508
 509        var *intvar[NUM_INTERNAL_VARS]; /* often used */
 510
 511        /* former statics from various functions */
 512        char *split_f0__fstrings;
 513
 514        uint32_t next_token__save_tclass;
 515        uint32_t next_token__save_info;
 516        uint32_t next_token__ltclass;
 517        smallint next_token__concat_inserted;
 518
 519        smallint next_input_file__files_happen;
 520        rstream next_input_file__rsm;
 521
 522        var *evaluate__fnargs;
 523        unsigned evaluate__seed;
 524        regex_t evaluate__sreg;
 525
 526        var ptest__v;
 527
 528        tsplitter exec_builtin__tspl;
 529
 530        /* biggest and least used members go last */
 531        tsplitter fsplitter, rsplitter;
 532};
 533#define G1 (ptr_to_globals[-1])
 534#define G (*(struct globals2 *)ptr_to_globals)
 535/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
 536/*char G1size[sizeof(G1)]; - 0x74 */
 537/*char Gsize[sizeof(G)]; - 0x1c4 */
 538/* Trying to keep most of members accessible with short offsets: */
 539/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
 540#define t_double     (G1.t_double    )
 541#define beginseq     (G1.beginseq    )
 542#define mainseq      (G1.mainseq     )
 543#define endseq       (G1.endseq      )
 544#define seq          (G1.seq         )
 545#define break_ptr    (G1.break_ptr   )
 546#define continue_ptr (G1.continue_ptr)
 547#define iF           (G1.iF          )
 548#define vhash        (G1.vhash       )
 549#define ahash        (G1.ahash       )
 550#define fdhash       (G1.fdhash      )
 551#define fnhash       (G1.fnhash      )
 552#define g_progname   (G1.g_progname  )
 553#define g_lineno     (G1.g_lineno    )
 554#define nfields      (G1.nfields     )
 555#define maxfields    (G1.maxfields   )
 556#define Fields       (G1.Fields      )
 557#define g_cb         (G1.g_cb        )
 558#define g_pos        (G1.g_pos       )
 559#define g_buf        (G1.g_buf       )
 560#define icase        (G1.icase       )
 561#define exiting      (G1.exiting     )
 562#define nextrec      (G1.nextrec     )
 563#define nextfile     (G1.nextfile    )
 564#define is_f0_split  (G1.is_f0_split )
 565#define t_rollback   (G1.t_rollback  )
 566#define t_info       (G.t_info      )
 567#define t_tclass     (G.t_tclass    )
 568#define t_string     (G.t_string    )
 569#define t_lineno     (G.t_lineno    )
 570#define intvar       (G.intvar      )
 571#define fsplitter    (G.fsplitter   )
 572#define rsplitter    (G.rsplitter   )
 573#define INIT_G() do { \
 574        SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
 575        G.next_token__ltclass = TC_OPTERM; \
 576        G.evaluate__seed = 1; \
 577} while (0)
 578
 579
 580/* function prototypes */
 581static void handle_special(var *);
 582static node *parse_expr(uint32_t);
 583static void chain_group(void);
 584static var *evaluate(node *, var *);
 585static rstream *next_input_file(void);
 586static int fmt_num(char *, int, const char *, double, int);
 587static int awk_exit(int) NORETURN;
 588
 589/* ---- error handling ---- */
 590
 591static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
 592static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
 593static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
 594static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
 595static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
 596static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
 597static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
 598static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
 599static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
 600static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
 601static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
 602
 603static void zero_out_var(var *vp)
 604{
 605        memset(vp, 0, sizeof(*vp));
 606}
 607
 608static void syntax_error(const char *message) NORETURN;
 609static void syntax_error(const char *message)
 610{
 611        bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
 612}
 613
 614/* ---- hash stuff ---- */
 615
 616static unsigned hashidx(const char *name)
 617{
 618        unsigned idx = 0;
 619
 620        while (*name)
 621                idx = *name++ + (idx << 6) - idx;
 622        return idx;
 623}
 624
 625/* create new hash */
 626static xhash *hash_init(void)
 627{
 628        xhash *newhash;
 629
 630        newhash = xzalloc(sizeof(*newhash));
 631        newhash->csize = FIRST_PRIME;
 632        newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
 633
 634        return newhash;
 635}
 636
 637/* find item in hash, return ptr to data, NULL if not found */
 638static void *hash_search(xhash *hash, const char *name)
 639{
 640        hash_item *hi;
 641
 642        hi = hash->items[hashidx(name) % hash->csize];
 643        while (hi) {
 644                if (strcmp(hi->name, name) == 0)
 645                        return &hi->data;
 646                hi = hi->next;
 647        }
 648        return NULL;
 649}
 650
 651/* grow hash if it becomes too big */
 652static void hash_rebuild(xhash *hash)
 653{
 654        unsigned newsize, i, idx;
 655        hash_item **newitems, *hi, *thi;
 656
 657        if (hash->nprime == ARRAY_SIZE(PRIMES))
 658                return;
 659
 660        newsize = PRIMES[hash->nprime++];
 661        newitems = xzalloc(newsize * sizeof(newitems[0]));
 662
 663        for (i = 0; i < hash->csize; i++) {
 664                hi = hash->items[i];
 665                while (hi) {
 666                        thi = hi;
 667                        hi = thi->next;
 668                        idx = hashidx(thi->name) % newsize;
 669                        thi->next = newitems[idx];
 670                        newitems[idx] = thi;
 671                }
 672        }
 673
 674        free(hash->items);
 675        hash->csize = newsize;
 676        hash->items = newitems;
 677}
 678
 679/* find item in hash, add it if necessary. Return ptr to data */
 680static void *hash_find(xhash *hash, const char *name)
 681{
 682        hash_item *hi;
 683        unsigned idx;
 684        int l;
 685
 686        hi = hash_search(hash, name);
 687        if (!hi) {
 688                if (++hash->nel / hash->csize > 10)
 689                        hash_rebuild(hash);
 690
 691                l = strlen(name) + 1;
 692                hi = xzalloc(sizeof(*hi) + l);
 693                strcpy(hi->name, name);
 694
 695                idx = hashidx(name) % hash->csize;
 696                hi->next = hash->items[idx];
 697                hash->items[idx] = hi;
 698                hash->glen += l;
 699        }
 700        return &hi->data;
 701}
 702
 703#define findvar(hash, name) ((var*)    hash_find((hash), (name)))
 704#define newvar(name)        ((var*)    hash_find(vhash, (name)))
 705#define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
 706#define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
 707
 708static void hash_remove(xhash *hash, const char *name)
 709{
 710        hash_item *hi, **phi;
 711
 712        phi = &hash->items[hashidx(name) % hash->csize];
 713        while (*phi) {
 714                hi = *phi;
 715                if (strcmp(hi->name, name) == 0) {
 716                        hash->glen -= (strlen(name) + 1);
 717                        hash->nel--;
 718                        *phi = hi->next;
 719                        free(hi);
 720                        break;
 721                }
 722                phi = &hi->next;
 723        }
 724}
 725
 726/* ------ some useful functions ------ */
 727
 728static char *skip_spaces(char *p)
 729{
 730        while (1) {
 731                if (*p == '\\' && p[1] == '\n') {
 732                        p++;
 733                        t_lineno++;
 734                } else if (*p != ' ' && *p != '\t') {
 735                        break;
 736                }
 737                p++;
 738        }
 739        return p;
 740}
 741
 742/* returns old *s, advances *s past word and terminating NUL */
 743static char *nextword(char **s)
 744{
 745        char *p = *s;
 746        while (*(*s)++ != '\0')
 747                continue;
 748        return p;
 749}
 750
 751static char nextchar(char **s)
 752{
 753        char c, *pps;
 754
 755        c = *(*s)++;
 756        pps = *s;
 757        if (c == '\\')
 758                c = bb_process_escape_sequence((const char**)s);
 759        /* Example awk statement:
 760         * s = "abc\"def"
 761         * we must treat \" as "
 762         */
 763        if (c == '\\' && *s == pps) { /* unrecognized \z? */
 764                c = *(*s); /* yes, fetch z */
 765                if (c)
 766                        (*s)++; /* advance unless z = NUL */
 767        }
 768        return c;
 769}
 770
 771/* TODO: merge with strcpy_and_process_escape_sequences()?
 772 */
 773static void unescape_string_in_place(char *s1)
 774{
 775        char *s = s1;
 776        while ((*s1 = nextchar(&s)) != '\0')
 777                s1++;
 778}
 779
 780static ALWAYS_INLINE int isalnum_(int c)
 781{
 782        return (isalnum(c) || c == '_');
 783}
 784
 785static double my_strtod(char **pp)
 786{
 787        char *cp = *pp;
 788        if (ENABLE_DESKTOP && cp[0] == '0') {
 789                /* Might be hex or octal integer: 0x123abc or 07777 */
 790                char c = (cp[1] | 0x20);
 791                if (c == 'x' || isdigit(cp[1])) {
 792                        unsigned long long ull = strtoull(cp, pp, 0);
 793                        if (c == 'x')
 794                                return ull;
 795                        c = **pp;
 796                        if (!isdigit(c) && c != '.')
 797                                return ull;
 798                        /* else: it may be a floating number. Examples:
 799                         * 009.123 (*pp points to '9')
 800                         * 000.123 (*pp points to '.')
 801                         * fall through to strtod.
 802                         */
 803                }
 804        }
 805        return strtod(cp, pp);
 806}
 807
 808/* -------- working with variables (set/get/copy/etc) -------- */
 809
 810static xhash *iamarray(var *v)
 811{
 812        var *a = v;
 813
 814        while (a->type & VF_CHILD)
 815                a = a->x.parent;
 816
 817        if (!(a->type & VF_ARRAY)) {
 818                a->type |= VF_ARRAY;
 819                a->x.array = hash_init();
 820        }
 821        return a->x.array;
 822}
 823
 824static void clear_array(xhash *array)
 825{
 826        unsigned i;
 827        hash_item *hi, *thi;
 828
 829        for (i = 0; i < array->csize; i++) {
 830                hi = array->items[i];
 831                while (hi) {
 832                        thi = hi;
 833                        hi = hi->next;
 834                        free(thi->data.v.string);
 835                        free(thi);
 836                }
 837                array->items[i] = NULL;
 838        }
 839        array->glen = array->nel = 0;
 840}
 841
 842/* clear a variable */
 843static var *clrvar(var *v)
 844{
 845        if (!(v->type & VF_FSTR))
 846                free(v->string);
 847
 848        v->type &= VF_DONTTOUCH;
 849        v->type |= VF_DIRTY;
 850        v->string = NULL;
 851        return v;
 852}
 853
 854/* assign string value to variable */
 855static var *setvar_p(var *v, char *value)
 856{
 857        clrvar(v);
 858        v->string = value;
 859        handle_special(v);
 860        return v;
 861}
 862
 863/* same as setvar_p but make a copy of string */
 864static var *setvar_s(var *v, const char *value)
 865{
 866        return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
 867}
 868
 869/* same as setvar_s but sets USER flag */
 870static var *setvar_u(var *v, const char *value)
 871{
 872        v = setvar_s(v, value);
 873        v->type |= VF_USER;
 874        return v;
 875}
 876
 877/* set array element to user string */
 878static void setari_u(var *a, int idx, const char *s)
 879{
 880        var *v;
 881
 882        v = findvar(iamarray(a), itoa(idx));
 883        setvar_u(v, s);
 884}
 885
 886/* assign numeric value to variable */
 887static var *setvar_i(var *v, double value)
 888{
 889        clrvar(v);
 890        v->type |= VF_NUMBER;
 891        v->number = value;
 892        handle_special(v);
 893        return v;
 894}
 895
 896static const char *getvar_s(var *v)
 897{
 898        /* if v is numeric and has no cached string, convert it to string */
 899        if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
 900                fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
 901                v->string = xstrdup(g_buf);
 902                v->type |= VF_CACHED;
 903        }
 904        return (v->string == NULL) ? "" : v->string;
 905}
 906
 907static double getvar_i(var *v)
 908{
 909        char *s;
 910
 911        if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
 912                v->number = 0;
 913                s = v->string;
 914                if (s && *s) {
 915                        debug_printf_eval("getvar_i: '%s'->", s);
 916                        v->number = my_strtod(&s);
 917                        debug_printf_eval("%f (s:'%s')\n", v->number, s);
 918                        if (v->type & VF_USER) {
 919                                s = skip_spaces(s);
 920                                if (*s != '\0')
 921                                        v->type &= ~VF_USER;
 922                        }
 923                } else {
 924                        debug_printf_eval("getvar_i: '%s'->zero\n", s);
 925                        v->type &= ~VF_USER;
 926                }
 927                v->type |= VF_CACHED;
 928        }
 929        debug_printf_eval("getvar_i: %f\n", v->number);
 930        return v->number;
 931}
 932
 933/* Used for operands of bitwise ops */
 934static unsigned long getvar_i_int(var *v)
 935{
 936        double d = getvar_i(v);
 937
 938        /* Casting doubles to longs is undefined for values outside
 939         * of target type range. Try to widen it as much as possible */
 940        if (d >= 0)
 941                return (unsigned long)d;
 942        /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
 943        return - (long) (unsigned long) (-d);
 944}
 945
 946static var *copyvar(var *dest, const var *src)
 947{
 948        if (dest != src) {
 949                clrvar(dest);
 950                dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
 951                debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
 952                dest->number = src->number;
 953                if (src->string)
 954                        dest->string = xstrdup(src->string);
 955        }
 956        handle_special(dest);
 957        return dest;
 958}
 959
 960static var *incvar(var *v)
 961{
 962        return setvar_i(v, getvar_i(v) + 1.0);
 963}
 964
 965/* return true if v is number or numeric string */
 966static int is_numeric(var *v)
 967{
 968        getvar_i(v);
 969        return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
 970}
 971
 972/* return 1 when value of v corresponds to true, 0 otherwise */
 973static int istrue(var *v)
 974{
 975        if (is_numeric(v))
 976                return (v->number != 0);
 977        return (v->string && v->string[0]);
 978}
 979
 980/* temporary variables allocator. Last allocated should be first freed */
 981static var *nvalloc(int n)
 982{
 983        nvblock *pb = NULL;
 984        var *v, *r;
 985        int size;
 986
 987        while (g_cb) {
 988                pb = g_cb;
 989                if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
 990                        break;
 991                g_cb = g_cb->next;
 992        }
 993
 994        if (!g_cb) {
 995                size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
 996                g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
 997                g_cb->size = size;
 998                g_cb->pos = g_cb->nv;
 999                g_cb->prev = pb;
1000                /*g_cb->next = NULL; - xzalloc did it */
1001                if (pb)
1002                        pb->next = g_cb;
1003        }
1004
1005        v = r = g_cb->pos;
1006        g_cb->pos += n;
1007
1008        while (v < g_cb->pos) {
1009                v->type = 0;
1010                v->string = NULL;
1011                v++;
1012        }
1013
1014        return r;
1015}
1016
1017static void nvfree(var *v)
1018{
1019        var *p;
1020
1021        if (v < g_cb->nv || v >= g_cb->pos)
1022                syntax_error(EMSG_INTERNAL_ERROR);
1023
1024        for (p = v; p < g_cb->pos; p++) {
1025                if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1026                        clear_array(iamarray(p));
1027                        free(p->x.array->items);
1028                        free(p->x.array);
1029                }
1030                if (p->type & VF_WALK) {
1031                        walker_list *n;
1032                        walker_list *w = p->x.walker;
1033                        debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1034                        p->x.walker = NULL;
1035                        while (w) {
1036                                n = w->prev;
1037                                debug_printf_walker(" free(%p)\n", w);
1038                                free(w);
1039                                w = n;
1040                        }
1041                }
1042                clrvar(p);
1043        }
1044
1045        g_cb->pos = v;
1046        while (g_cb->prev && g_cb->pos == g_cb->nv) {
1047                g_cb = g_cb->prev;
1048        }
1049}
1050
1051/* ------- awk program text parsing ------- */
1052
1053/* Parse next token pointed by global pos, place results into global ttt.
1054 * If token isn't expected, give away. Return token class
1055 */
1056static uint32_t next_token(uint32_t expected)
1057{
1058#define concat_inserted (G.next_token__concat_inserted)
1059#define save_tclass     (G.next_token__save_tclass)
1060#define save_info       (G.next_token__save_info)
1061/* Initialized to TC_OPTERM: */
1062#define ltclass         (G.next_token__ltclass)
1063
1064        char *p, *s;
1065        const char *tl;
1066        uint32_t tc;
1067        const uint32_t *ti;
1068
1069        if (t_rollback) {
1070                t_rollback = FALSE;
1071        } else if (concat_inserted) {
1072                concat_inserted = FALSE;
1073                t_tclass = save_tclass;
1074                t_info = save_info;
1075        } else {
1076                p = g_pos;
1077 readnext:
1078                p = skip_spaces(p);
1079                g_lineno = t_lineno;
1080                if (*p == '#')
1081                        while (*p != '\n' && *p != '\0')
1082                                p++;
1083
1084                if (*p == '\n')
1085                        t_lineno++;
1086
1087                if (*p == '\0') {
1088                        tc = TC_EOF;
1089                        debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1090                } else if (*p == '\"') {
1091                        /* it's a string */
1092                        t_string = s = ++p;
1093                        while (*p != '\"') {
1094                                char *pp;
1095                                if (*p == '\0' || *p == '\n')
1096                                        syntax_error(EMSG_UNEXP_EOS);
1097                                pp = p;
1098                                *s++ = nextchar(&pp);
1099                                p = pp;
1100                        }
1101                        p++;
1102                        *s = '\0';
1103                        tc = TC_STRING;
1104                        debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1105                } else if ((expected & TC_REGEXP) && *p == '/') {
1106                        /* it's regexp */
1107                        t_string = s = ++p;
1108                        while (*p != '/') {
1109                                if (*p == '\0' || *p == '\n')
1110                                        syntax_error(EMSG_UNEXP_EOS);
1111                                *s = *p++;
1112                                if (*s++ == '\\') {
1113                                        char *pp = p;
1114                                        s[-1] = bb_process_escape_sequence((const char **)&pp);
1115                                        if (*p == '\\')
1116                                                *s++ = '\\';
1117                                        if (pp == p)
1118                                                *s++ = *p++;
1119                                        else
1120                                                p = pp;
1121                                }
1122                        }
1123                        p++;
1124                        *s = '\0';
1125                        tc = TC_REGEXP;
1126                        debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1127
1128                } else if (*p == '.' || isdigit(*p)) {
1129                        /* it's a number */
1130                        char *pp = p;
1131                        t_double = my_strtod(&pp);
1132                        p = pp;
1133                        if (*p == '.')
1134                                syntax_error(EMSG_UNEXP_TOKEN);
1135                        tc = TC_NUMBER;
1136                        debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1137                } else {
1138                        /* search for something known */
1139                        tl = tokenlist;
1140                        tc = 0x00000001;
1141                        ti = tokeninfo;
1142                        while (*tl) {
1143                                int l = (unsigned char) *tl++;
1144                                if (l == (unsigned char) NTCC) {
1145                                        tc <<= 1;
1146                                        continue;
1147                                }
1148                                /* if token class is expected,
1149                                 * token matches,
1150                                 * and it's not a longer word,
1151                                 */
1152                                if ((tc & (expected | TC_WORD | TC_NEWLINE))
1153                                 && strncmp(p, tl, l) == 0
1154                                 && !((tc & TC_WORD) && isalnum_(p[l]))
1155                                ) {
1156                                        /* then this is what we are looking for */
1157                                        t_info = *ti;
1158                                        debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1159                                        p += l;
1160                                        goto token_found;
1161                                }
1162                                ti++;
1163                                tl += l;
1164                        }
1165                        /* not a known token */
1166
1167                        /* is it a name? (var/array/function) */
1168                        if (!isalnum_(*p))
1169                                syntax_error(EMSG_UNEXP_TOKEN); /* no */
1170                        /* yes */
1171                        t_string = --p;
1172                        while (isalnum_(*++p)) {
1173                                p[-1] = *p;
1174                        }
1175                        p[-1] = '\0';
1176                        tc = TC_VARIABLE;
1177                        /* also consume whitespace between functionname and bracket */
1178                        if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1179                                p = skip_spaces(p);
1180                        if (*p == '(') {
1181                                tc = TC_FUNCTION;
1182                                debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1183                        } else {
1184                                if (*p == '[') {
1185                                        p++;
1186                                        tc = TC_ARRAY;
1187                                        debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1188                                } else
1189                                        debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1190                        }
1191                }
1192 token_found:
1193                g_pos = p;
1194
1195                /* skipping newlines in some cases */
1196                if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1197                        goto readnext;
1198
1199                /* insert concatenation operator when needed */
1200                if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1201                        concat_inserted = TRUE;
1202                        save_tclass = tc;
1203                        save_info = t_info;
1204                        tc = TC_BINOP;
1205                        t_info = OC_CONCAT | SS | P(35);
1206                }
1207
1208                t_tclass = tc;
1209        }
1210        ltclass = t_tclass;
1211
1212        /* Are we ready for this? */
1213        if (!(ltclass & expected)) {
1214                syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1215                                EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1216        }
1217
1218        return ltclass;
1219#undef concat_inserted
1220#undef save_tclass
1221#undef save_info
1222#undef ltclass
1223}
1224
1225static void rollback_token(void)
1226{
1227        t_rollback = TRUE;
1228}
1229
1230static node *new_node(uint32_t info)
1231{
1232        node *n;
1233
1234        n = xzalloc(sizeof(node));
1235        n->info = info;
1236        n->lineno = g_lineno;
1237        return n;
1238}
1239
1240static void mk_re_node(const char *s, node *n, regex_t *re)
1241{
1242        n->info = OC_REGEXP;
1243        n->l.re = re;
1244        n->r.ire = re + 1;
1245        xregcomp(re, s, REG_EXTENDED);
1246        xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1247}
1248
1249static node *condition(void)
1250{
1251        next_token(TC_SEQSTART);
1252        return parse_expr(TC_SEQTERM);
1253}
1254
1255/* parse expression terminated by given argument, return ptr
1256 * to built subtree. Terminator is eaten by parse_expr */
1257static node *parse_expr(uint32_t iexp)
1258{
1259        node sn;
1260        node *cn = &sn;
1261        node *vn, *glptr;
1262        uint32_t tc, xtc;
1263        var *v;
1264
1265        debug_printf_parse("%s(%x)\n", __func__, iexp);
1266
1267        sn.info = PRIMASK;
1268        sn.r.n = glptr = NULL;
1269        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1270
1271        while (!((tc = next_token(xtc)) & iexp)) {
1272
1273                if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1274                        /* input redirection (<) attached to glptr node */
1275                        debug_printf_parse("%s: input redir\n", __func__);
1276                        cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1277                        cn->a.n = glptr;
1278                        xtc = TC_OPERAND | TC_UOPPRE;
1279                        glptr = NULL;
1280
1281                } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1282                        debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1283                        /* for binary and postfix-unary operators, jump back over
1284                         * previous operators with higher priority */
1285                        vn = cn;
1286                        while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1287                            || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1288                        ) {
1289                                vn = vn->a.n;
1290                        }
1291                        if ((t_info & OPCLSMASK) == OC_TERNARY)
1292                                t_info += P(6);
1293                        cn = vn->a.n->r.n = new_node(t_info);
1294                        cn->a.n = vn->a.n;
1295                        if (tc & TC_BINOP) {
1296                                cn->l.n = vn;
1297                                xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1298                                if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1299                                        /* it's a pipe */
1300                                        next_token(TC_GETLINE);
1301                                        /* give maximum priority to this pipe */
1302                                        cn->info &= ~PRIMASK;
1303                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1304                                }
1305                        } else {
1306                                cn->r.n = vn;
1307                                xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1308                        }
1309                        vn->a.n = cn;
1310
1311                } else {
1312                        debug_printf_parse("%s: other\n", __func__);
1313                        /* for operands and prefix-unary operators, attach them
1314                         * to last node */
1315                        vn = cn;
1316                        cn = vn->r.n = new_node(t_info);
1317                        cn->a.n = vn;
1318                        xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1319                        if (tc & (TC_OPERAND | TC_REGEXP)) {
1320                                debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1321                                xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1322                                /* one should be very careful with switch on tclass -
1323                                 * only simple tclasses should be used! */
1324                                switch (tc) {
1325                                case TC_VARIABLE:
1326                                case TC_ARRAY:
1327                                        debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1328                                        cn->info = OC_VAR;
1329                                        v = hash_search(ahash, t_string);
1330                                        if (v != NULL) {
1331                                                cn->info = OC_FNARG;
1332                                                cn->l.aidx = v->x.aidx;
1333                                        } else {
1334                                                cn->l.v = newvar(t_string);
1335                                        }
1336                                        if (tc & TC_ARRAY) {
1337                                                cn->info |= xS;
1338                                                cn->r.n = parse_expr(TC_ARRTERM);
1339                                        }
1340                                        break;
1341
1342                                case TC_NUMBER:
1343                                case TC_STRING:
1344                                        debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1345                                        cn->info = OC_VAR;
1346                                        v = cn->l.v = xzalloc(sizeof(var));
1347                                        if (tc & TC_NUMBER)
1348                                                setvar_i(v, t_double);
1349                                        else
1350                                                setvar_s(v, t_string);
1351                                        break;
1352
1353                                case TC_REGEXP:
1354                                        debug_printf_parse("%s: TC_REGEXP\n", __func__);
1355                                        mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1356                                        break;
1357
1358                                case TC_FUNCTION:
1359                                        debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1360                                        cn->info = OC_FUNC;
1361                                        cn->r.f = newfunc(t_string);
1362                                        cn->l.n = condition();
1363                                        break;
1364
1365                                case TC_SEQSTART:
1366                                        debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1367                                        cn = vn->r.n = parse_expr(TC_SEQTERM);
1368                                        if (!cn)
1369                                                syntax_error("Empty sequence");
1370                                        cn->a.n = vn;
1371                                        break;
1372
1373                                case TC_GETLINE:
1374                                        debug_printf_parse("%s: TC_GETLINE\n", __func__);
1375                                        glptr = cn;
1376                                        xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1377                                        break;
1378
1379                                case TC_BUILTIN:
1380                                        debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1381                                        cn->l.n = condition();
1382                                        break;
1383
1384                                case TC_LENGTH:
1385                                        debug_printf_parse("%s: TC_LENGTH\n", __func__);
1386                                        next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM);
1387                                        rollback_token();
1388                                        if (t_tclass & TC_SEQSTART) {
1389                                                /* It was a "(" token. Handle just like TC_BUILTIN */
1390                                                cn->l.n = condition();
1391                                        }
1392                                        break;
1393                                }
1394                        }
1395                }
1396        }
1397
1398        debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1399        return sn.r.n;
1400}
1401
1402/* add node to chain. Return ptr to alloc'd node */
1403static node *chain_node(uint32_t info)
1404{
1405        node *n;
1406
1407        if (!seq->first)
1408                seq->first = seq->last = new_node(0);
1409
1410        if (seq->programname != g_progname) {
1411                seq->programname = g_progname;
1412                n = chain_node(OC_NEWSOURCE);
1413                n->l.new_progname = xstrdup(g_progname);
1414        }
1415
1416        n = seq->last;
1417        n->info = info;
1418        seq->last = n->a.n = new_node(OC_DONE);
1419
1420        return n;
1421}
1422
1423static void chain_expr(uint32_t info)
1424{
1425        node *n;
1426
1427        n = chain_node(info);
1428        n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1429        if (t_tclass & TC_GRPTERM)
1430                rollback_token();
1431}
1432
1433static node *chain_loop(node *nn)
1434{
1435        node *n, *n2, *save_brk, *save_cont;
1436
1437        save_brk = break_ptr;
1438        save_cont = continue_ptr;
1439
1440        n = chain_node(OC_BR | Vx);
1441        continue_ptr = new_node(OC_EXEC);
1442        break_ptr = new_node(OC_EXEC);
1443        chain_group();
1444        n2 = chain_node(OC_EXEC | Vx);
1445        n2->l.n = nn;
1446        n2->a.n = n;
1447        continue_ptr->a.n = n2;
1448        break_ptr->a.n = n->r.n = seq->last;
1449
1450        continue_ptr = save_cont;
1451        break_ptr = save_brk;
1452
1453        return n;
1454}
1455
1456/* parse group and attach it to chain */
1457static void chain_group(void)
1458{
1459        uint32_t c;
1460        node *n, *n2, *n3;
1461
1462        do {
1463                c = next_token(TC_GRPSEQ);
1464        } while (c & TC_NEWLINE);
1465
1466        if (c & TC_GRPSTART) {
1467                debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1468                while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1469                        debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1470                        if (t_tclass & TC_NEWLINE)
1471                                continue;
1472                        rollback_token();
1473                        chain_group();
1474                }
1475                debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1476        } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1477                debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1478                rollback_token();
1479                chain_expr(OC_EXEC | Vx);
1480        } else {
1481                /* TC_STATEMNT */
1482                debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1483                switch (t_info & OPCLSMASK) {
1484                case ST_IF:
1485                        debug_printf_parse("%s: ST_IF\n", __func__);
1486                        n = chain_node(OC_BR | Vx);
1487                        n->l.n = condition();
1488                        chain_group();
1489                        n2 = chain_node(OC_EXEC);
1490                        n->r.n = seq->last;
1491                        if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1492                                chain_group();
1493                                n2->a.n = seq->last;
1494                        } else {
1495                                rollback_token();
1496                        }
1497                        break;
1498
1499                case ST_WHILE:
1500                        debug_printf_parse("%s: ST_WHILE\n", __func__);
1501                        n2 = condition();
1502                        n = chain_loop(NULL);
1503                        n->l.n = n2;
1504                        break;
1505
1506                case ST_DO:
1507                        debug_printf_parse("%s: ST_DO\n", __func__);
1508                        n2 = chain_node(OC_EXEC);
1509                        n = chain_loop(NULL);
1510                        n2->a.n = n->a.n;
1511                        next_token(TC_WHILE);
1512                        n->l.n = condition();
1513                        break;
1514
1515                case ST_FOR:
1516                        debug_printf_parse("%s: ST_FOR\n", __func__);
1517                        next_token(TC_SEQSTART);
1518                        n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1519                        if (t_tclass & TC_SEQTERM) {    /* for-in */
1520                                if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
1521                                        syntax_error(EMSG_UNEXP_TOKEN);
1522                                n = chain_node(OC_WALKINIT | VV);
1523                                n->l.n = n2->l.n;
1524                                n->r.n = n2->r.n;
1525                                n = chain_loop(NULL);
1526                                n->info = OC_WALKNEXT | Vx;
1527                                n->l.n = n2->l.n;
1528                        } else {                        /* for (;;) */
1529                                n = chain_node(OC_EXEC | Vx);
1530                                n->l.n = n2;
1531                                n2 = parse_expr(TC_SEMICOL);
1532                                n3 = parse_expr(TC_SEQTERM);
1533                                n = chain_loop(n3);
1534                                n->l.n = n2;
1535                                if (!n2)
1536                                        n->info = OC_EXEC;
1537                        }
1538                        break;
1539
1540                case OC_PRINT:
1541                case OC_PRINTF:
1542                        debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1543                        n = chain_node(t_info);
1544                        n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1545                        if (t_tclass & TC_OUTRDR) {
1546                                n->info |= t_info;
1547                                n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1548                        }
1549                        if (t_tclass & TC_GRPTERM)
1550                                rollback_token();
1551                        break;
1552
1553                case OC_BREAK:
1554                        debug_printf_parse("%s: OC_BREAK\n", __func__);
1555                        n = chain_node(OC_EXEC);
1556                        n->a.n = break_ptr;
1557                        chain_expr(t_info);
1558                        break;
1559
1560                case OC_CONTINUE:
1561                        debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1562                        n = chain_node(OC_EXEC);
1563                        n->a.n = continue_ptr;
1564                        chain_expr(t_info);
1565                        break;
1566
1567                /* delete, next, nextfile, return, exit */
1568                default:
1569                        debug_printf_parse("%s: default\n", __func__);
1570                        chain_expr(t_info);
1571                }
1572        }
1573}
1574
1575static void parse_program(char *p)
1576{
1577        uint32_t tclass;
1578        node *cn;
1579        func *f;
1580        var *v;
1581
1582        g_pos = p;
1583        t_lineno = 1;
1584        while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1585                        TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1586
1587                if (tclass & TC_OPTERM) {
1588                        debug_printf_parse("%s: TC_OPTERM\n", __func__);
1589                        continue;
1590                }
1591
1592                seq = &mainseq;
1593                if (tclass & TC_BEGIN) {
1594                        debug_printf_parse("%s: TC_BEGIN\n", __func__);
1595                        seq = &beginseq;
1596                        chain_group();
1597                } else if (tclass & TC_END) {
1598                        debug_printf_parse("%s: TC_END\n", __func__);
1599                        seq = &endseq;
1600                        chain_group();
1601                } else if (tclass & TC_FUNCDECL) {
1602                        debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1603                        next_token(TC_FUNCTION);
1604                        g_pos++;
1605                        f = newfunc(t_string);
1606                        f->body.first = NULL;
1607                        f->nargs = 0;
1608                        while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1609                                v = findvar(ahash, t_string);
1610                                v->x.aidx = f->nargs++;
1611
1612                                if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1613                                        break;
1614                        }
1615                        seq = &f->body;
1616                        chain_group();
1617                        clear_array(ahash);
1618                } else if (tclass & TC_OPSEQ) {
1619                        debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1620                        rollback_token();
1621                        cn = chain_node(OC_TEST);
1622                        cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1623                        if (t_tclass & TC_GRPSTART) {
1624                                debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1625                                rollback_token();
1626                                chain_group();
1627                        } else {
1628                                debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1629                                chain_node(OC_PRINT);
1630                        }
1631                        cn->r.n = mainseq.last;
1632                } else /* if (tclass & TC_GRPSTART) */ {
1633                        debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1634                        rollback_token();
1635                        chain_group();
1636                }
1637        }
1638        debug_printf_parse("%s: TC_EOF\n", __func__);
1639}
1640
1641
1642/* -------- program execution part -------- */
1643
1644static node *mk_splitter(const char *s, tsplitter *spl)
1645{
1646        regex_t *re, *ire;
1647        node *n;
1648
1649        re = &spl->re[0];
1650        ire = &spl->re[1];
1651        n = &spl->n;
1652        if ((n->info & OPCLSMASK) == OC_REGEXP) {
1653                regfree(re);
1654                regfree(ire); // TODO: nuke ire, use re+1?
1655        }
1656        if (s[0] && s[1]) { /* strlen(s) > 1 */
1657                mk_re_node(s, n, re);
1658        } else {
1659                n->info = (uint32_t) s[0];
1660        }
1661
1662        return n;
1663}
1664
1665/* use node as a regular expression. Supplied with node ptr and regex_t
1666 * storage space. Return ptr to regex (if result points to preg, it should
1667 * be later regfree'd manually
1668 */
1669static regex_t *as_regex(node *op, regex_t *preg)
1670{
1671        int cflags;
1672        var *v;
1673        const char *s;
1674
1675        if ((op->info & OPCLSMASK) == OC_REGEXP) {
1676                return icase ? op->r.ire : op->l.re;
1677        }
1678        v = nvalloc(1);
1679        s = getvar_s(evaluate(op, v));
1680
1681        cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1682        /* Testcase where REG_EXTENDED fails (unpaired '{'):
1683         * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1684         * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1685         * (maybe gsub is not supposed to use REG_EXTENDED?).
1686         */
1687        if (regcomp(preg, s, cflags)) {
1688                cflags &= ~REG_EXTENDED;
1689                xregcomp(preg, s, cflags);
1690        }
1691        nvfree(v);
1692        return preg;
1693}
1694
1695/* gradually increasing buffer.
1696 * note that we reallocate even if n == old_size,
1697 * and thus there is at least one extra allocated byte.
1698 */
1699static char* qrealloc(char *b, int n, int *size)
1700{
1701        if (!b || n >= *size) {
1702                *size = n + (n>>1) + 80;
1703                b = xrealloc(b, *size);
1704        }
1705        return b;
1706}
1707
1708/* resize field storage space */
1709static void fsrealloc(int size)
1710{
1711        int i;
1712
1713        if (size >= maxfields) {
1714                i = maxfields;
1715                maxfields = size + 16;
1716                Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1717                for (; i < maxfields; i++) {
1718                        Fields[i].type = VF_SPECIAL;
1719                        Fields[i].string = NULL;
1720                }
1721        }
1722        /* if size < nfields, clear extra field variables */
1723        for (i = size; i < nfields; i++) {
1724                clrvar(Fields + i);
1725        }
1726        nfields = size;
1727}
1728
1729static int awk_split(const char *s, node *spl, char **slist)
1730{
1731        int l, n;
1732        char c[4];
1733        char *s1;
1734        regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1735
1736        /* in worst case, each char would be a separate field */
1737        *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1738        strcpy(s1, s);
1739
1740        c[0] = c[1] = (char)spl->info;
1741        c[2] = c[3] = '\0';
1742        if (*getvar_s(intvar[RS]) == '\0')
1743                c[2] = '\n';
1744
1745        n = 0;
1746        if ((spl->info & OPCLSMASK) == OC_REGEXP) {  /* regex split */
1747                if (!*s)
1748                        return n; /* "": zero fields */
1749                n++; /* at least one field will be there */
1750                do {
1751                        l = strcspn(s, c+2); /* len till next NUL or \n */
1752                        if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1753                         && pmatch[0].rm_so <= l
1754                        ) {
1755                                l = pmatch[0].rm_so;
1756                                if (pmatch[0].rm_eo == 0) {
1757                                        l++;
1758                                        pmatch[0].rm_eo++;
1759                                }
1760                                n++; /* we saw yet another delimiter */
1761                        } else {
1762                                pmatch[0].rm_eo = l;
1763                                if (s[l])
1764                                        pmatch[0].rm_eo++;
1765                        }
1766                        memcpy(s1, s, l);
1767                        /* make sure we remove *all* of the separator chars */
1768                        do {
1769                                s1[l] = '\0';
1770                        } while (++l < pmatch[0].rm_eo);
1771                        nextword(&s1);
1772                        s += pmatch[0].rm_eo;
1773                } while (*s);
1774                return n;
1775        }
1776        if (c[0] == '\0') {  /* null split */
1777                while (*s) {
1778                        *s1++ = *s++;
1779                        *s1++ = '\0';
1780                        n++;
1781                }
1782                return n;
1783        }
1784        if (c[0] != ' ') {  /* single-character split */
1785                if (icase) {
1786                        c[0] = toupper(c[0]);
1787                        c[1] = tolower(c[1]);
1788                }
1789                if (*s1)
1790                        n++;
1791                while ((s1 = strpbrk(s1, c)) != NULL) {
1792                        *s1++ = '\0';
1793                        n++;
1794                }
1795                return n;
1796        }
1797        /* space split */
1798        while (*s) {
1799                s = skip_whitespace(s);
1800                if (!*s)
1801                        break;
1802                n++;
1803                while (*s && !isspace(*s))
1804                        *s1++ = *s++;
1805                *s1++ = '\0';
1806        }
1807        return n;
1808}
1809
1810static void split_f0(void)
1811{
1812/* static char *fstrings; */
1813#define fstrings (G.split_f0__fstrings)
1814
1815        int i, n;
1816        char *s;
1817
1818        if (is_f0_split)
1819                return;
1820
1821        is_f0_split = TRUE;
1822        free(fstrings);
1823        fsrealloc(0);
1824        n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1825        fsrealloc(n);
1826        s = fstrings;
1827        for (i = 0; i < n; i++) {
1828                Fields[i].string = nextword(&s);
1829                Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1830        }
1831
1832        /* set NF manually to avoid side effects */
1833        clrvar(intvar[NF]);
1834        intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1835        intvar[NF]->number = nfields;
1836#undef fstrings
1837}
1838
1839/* perform additional actions when some internal variables changed */
1840static void handle_special(var *v)
1841{
1842        int n;
1843        char *b;
1844        const char *sep, *s;
1845        int sl, l, len, i, bsize;
1846
1847        if (!(v->type & VF_SPECIAL))
1848                return;
1849
1850        if (v == intvar[NF]) {
1851                n = (int)getvar_i(v);
1852                if (n < 0)
1853                        syntax_error("NF set to negative value");
1854                fsrealloc(n);
1855
1856                /* recalculate $0 */
1857                sep = getvar_s(intvar[OFS]);
1858                sl = strlen(sep);
1859                b = NULL;
1860                len = 0;
1861                for (i = 0; i < n; i++) {
1862                        s = getvar_s(&Fields[i]);
1863                        l = strlen(s);
1864                        if (b) {
1865                                memcpy(b+len, sep, sl);
1866                                len += sl;
1867                        }
1868                        b = qrealloc(b, len+l+sl, &bsize);
1869                        memcpy(b+len, s, l);
1870                        len += l;
1871                }
1872                if (b)
1873                        b[len] = '\0';
1874                setvar_p(intvar[F0], b);
1875                is_f0_split = TRUE;
1876
1877        } else if (v == intvar[F0]) {
1878                is_f0_split = FALSE;
1879
1880        } else if (v == intvar[FS]) {
1881                /*
1882                 * The POSIX-2008 standard says that changing FS should have no effect on the
1883                 * current input line, but only on the next one. The language is:
1884                 *
1885                 * > Before the first reference to a field in the record is evaluated, the record
1886                 * > shall be split into fields, according to the rules in Regular Expressions,
1887                 * > using the value of FS that was current at the time the record was read.
1888                 *
1889                 * So, split up current line before assignment to FS:
1890                 */
1891                split_f0();
1892
1893                mk_splitter(getvar_s(v), &fsplitter);
1894        } else if (v == intvar[RS]) {
1895                mk_splitter(getvar_s(v), &rsplitter);
1896        } else if (v == intvar[IGNORECASE]) {
1897                icase = istrue(v);
1898        } else {                                /* $n */
1899                n = getvar_i(intvar[NF]);
1900                setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1901                /* right here v is invalid. Just to note... */
1902        }
1903}
1904
1905/* step through func/builtin/etc arguments */
1906static node *nextarg(node **pn)
1907{
1908        node *n;
1909
1910        n = *pn;
1911        if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1912                *pn = n->r.n;
1913                n = n->l.n;
1914        } else {
1915                *pn = NULL;
1916        }
1917        return n;
1918}
1919
1920static void hashwalk_init(var *v, xhash *array)
1921{
1922        hash_item *hi;
1923        unsigned i;
1924        walker_list *w;
1925        walker_list *prev_walker;
1926
1927        if (v->type & VF_WALK) {
1928                prev_walker = v->x.walker;
1929        } else {
1930                v->type |= VF_WALK;
1931                prev_walker = NULL;
1932        }
1933        debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1934
1935        w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1936        debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1937        w->cur = w->end = w->wbuf;
1938        w->prev = prev_walker;
1939        for (i = 0; i < array->csize; i++) {
1940                hi = array->items[i];
1941                while (hi) {
1942                        strcpy(w->end, hi->name);
1943                        nextword(&w->end);
1944                        hi = hi->next;
1945                }
1946        }
1947}
1948
1949static int hashwalk_next(var *v)
1950{
1951        walker_list *w = v->x.walker;
1952
1953        if (w->cur >= w->end) {
1954                walker_list *prev_walker = w->prev;
1955
1956                debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1957                free(w);
1958                v->x.walker = prev_walker;
1959                return FALSE;
1960        }
1961
1962        setvar_s(v, nextword(&w->cur));
1963        return TRUE;
1964}
1965
1966/* evaluate node, return 1 when result is true, 0 otherwise */
1967static int ptest(node *pattern)
1968{
1969        /* ptest__v is "static": to save stack space? */
1970        return istrue(evaluate(pattern, &G.ptest__v));
1971}
1972
1973/* read next record from stream rsm into a variable v */
1974static int awk_getline(rstream *rsm, var *v)
1975{
1976        char *b;
1977        regmatch_t pmatch[2];
1978        int size, a, p, pp = 0;
1979        int fd, so, eo, r, rp;
1980        char c, *m, *s;
1981
1982        debug_printf_eval("entered %s()\n", __func__);
1983
1984        /* we're using our own buffer since we need access to accumulating
1985         * characters
1986         */
1987        fd = fileno(rsm->F);
1988        m = rsm->buffer;
1989        a = rsm->adv;
1990        p = rsm->pos;
1991        size = rsm->size;
1992        c = (char) rsplitter.n.info;
1993        rp = 0;
1994
1995        if (!m)
1996                m = qrealloc(m, 256, &size);
1997
1998        do {
1999                b = m + a;
2000                so = eo = p;
2001                r = 1;
2002                if (p > 0) {
2003                        if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
2004                                if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2005                                                        b, 1, pmatch, 0) == 0) {
2006                                        so = pmatch[0].rm_so;
2007                                        eo = pmatch[0].rm_eo;
2008                                        if (b[eo] != '\0')
2009                                                break;
2010                                }
2011                        } else if (c != '\0') {
2012                                s = strchr(b+pp, c);
2013                                if (!s)
2014                                        s = memchr(b+pp, '\0', p - pp);
2015                                if (s) {
2016                                        so = eo = s-b;
2017                                        eo++;
2018                                        break;
2019                                }
2020                        } else {
2021                                while (b[rp] == '\n')
2022                                        rp++;
2023                                s = strstr(b+rp, "\n\n");
2024                                if (s) {
2025                                        so = eo = s-b;
2026                                        while (b[eo] == '\n')
2027                                                eo++;
2028                                        if (b[eo] != '\0')
2029                                                break;
2030                                }
2031                        }
2032                }
2033
2034                if (a > 0) {
2035                        memmove(m, m+a, p+1);
2036                        b = m;
2037                        a = 0;
2038                }
2039
2040                m = qrealloc(m, a+p+128, &size);
2041                b = m + a;
2042                pp = p;
2043                p += safe_read(fd, b+p, size-p-1);
2044                if (p < pp) {
2045                        p = 0;
2046                        r = 0;
2047                        setvar_i(intvar[ERRNO], errno);
2048                }
2049                b[p] = '\0';
2050
2051        } while (p > pp);
2052
2053        if (p == 0) {
2054                r--;
2055        } else {
2056                c = b[so]; b[so] = '\0';
2057                setvar_s(v, b+rp);
2058                v->type |= VF_USER;
2059                b[so] = c;
2060                c = b[eo]; b[eo] = '\0';
2061                setvar_s(intvar[RT], b+so);
2062                b[eo] = c;
2063        }
2064
2065        rsm->buffer = m;
2066        rsm->adv = a + eo;
2067        rsm->pos = p - eo;
2068        rsm->size = size;
2069
2070        debug_printf_eval("returning from %s(): %d\n", __func__, r);
2071
2072        return r;
2073}
2074
2075static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2076{
2077        int r = 0;
2078        char c;
2079        const char *s = format;
2080
2081        if (int_as_int && n == (long long)n) {
2082                r = snprintf(b, size, "%lld", (long long)n);
2083        } else {
2084                do { c = *s; } while (c && *++s);
2085                if (strchr("diouxX", c)) {
2086                        r = snprintf(b, size, format, (int)n);
2087                } else if (strchr("eEfgG", c)) {
2088                        r = snprintf(b, size, format, n);
2089                } else {
2090                        syntax_error(EMSG_INV_FMT);
2091                }
2092        }
2093        return r;
2094}
2095
2096/* formatted output into an allocated buffer, return ptr to buffer */
2097static char *awk_printf(node *n)
2098{
2099        char *b = NULL;
2100        char *fmt, *s, *f;
2101        const char *s1;
2102        int i, j, incr, bsize;
2103        char c, c1;
2104        var *v, *arg;
2105
2106        v = nvalloc(1);
2107        fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2108
2109        i = 0;
2110        while (*f) {
2111                s = f;
2112                while (*f && (*f != '%' || *++f == '%'))
2113                        f++;
2114                while (*f && !isalpha(*f)) {
2115                        if (*f == '*')
2116                                syntax_error("%*x formats are not supported");
2117                        f++;
2118                }
2119
2120                incr = (f - s) + MAXVARFMT;
2121                b = qrealloc(b, incr + i, &bsize);
2122                c = *f;
2123                if (c != '\0')
2124                        f++;
2125                c1 = *f;
2126                *f = '\0';
2127                arg = evaluate(nextarg(&n), v);
2128
2129                j = i;
2130                if (c == 'c' || !c) {
2131                        i += sprintf(b+i, s, is_numeric(arg) ?
2132                                        (char)getvar_i(arg) : *getvar_s(arg));
2133                } else if (c == 's') {
2134                        s1 = getvar_s(arg);
2135                        b = qrealloc(b, incr+i+strlen(s1), &bsize);
2136                        i += sprintf(b+i, s, s1);
2137                } else {
2138                        i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2139                }
2140                *f = c1;
2141
2142                /* if there was an error while sprintf, return value is negative */
2143                if (i < j)
2144                        i = j;
2145        }
2146
2147        free(fmt);
2148        nvfree(v);
2149        b = xrealloc(b, i + 1);
2150        b[i] = '\0';
2151        return b;
2152}
2153
2154/* Common substitution routine.
2155 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2156 * store result into (dest), return number of substitutions.
2157 * If nm = 0, replace all matches.
2158 * If src or dst is NULL, use $0.
2159 * If subexp != 0, enable subexpression matching (\1-\9).
2160 */
2161static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2162{
2163        char *resbuf;
2164        const char *sp;
2165        int match_no, residx, replen, resbufsize;
2166        int regexec_flags;
2167        regmatch_t pmatch[10];
2168        regex_t sreg, *regex;
2169
2170        resbuf = NULL;
2171        residx = 0;
2172        match_no = 0;
2173        regexec_flags = 0;
2174        regex = as_regex(rn, &sreg);
2175        sp = getvar_s(src ? src : intvar[F0]);
2176        replen = strlen(repl);
2177        while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2178                int so = pmatch[0].rm_so;
2179                int eo = pmatch[0].rm_eo;
2180
2181                //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2182                resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2183                memcpy(resbuf + residx, sp, eo);
2184                residx += eo;
2185                if (++match_no >= nm) {
2186                        const char *s;
2187                        int nbs;
2188
2189                        /* replace */
2190                        residx -= (eo - so);
2191                        nbs = 0;
2192                        for (s = repl; *s; s++) {
2193                                char c = resbuf[residx++] = *s;
2194                                if (c == '\\') {
2195                                        nbs++;
2196                                        continue;
2197                                }
2198                                if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2199                                        int j;
2200                                        residx -= ((nbs + 3) >> 1);
2201                                        j = 0;
2202                                        if (c != '&') {
2203                                                j = c - '0';
2204                                                nbs++;
2205                                        }
2206                                        if (nbs % 2) {
2207                                                resbuf[residx++] = c;
2208                                        } else {
2209                                                int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2210                                                resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2211                                                memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2212                                                residx += n;
2213                                        }
2214                                }
2215                                nbs = 0;
2216                        }
2217                }
2218
2219                regexec_flags = REG_NOTBOL;
2220                sp += eo;
2221                if (match_no == nm)
2222                        break;
2223                if (eo == so) {
2224                        /* Empty match (e.g. "b*" will match anywhere).
2225                         * Advance by one char. */
2226//BUG (bug 1333):
2227//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2228//... and will erroneously match "b" even though it is NOT at the word start.
2229//we need REG_NOTBOW but it does not exist...
2230//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2231//it should be able to do it correctly.
2232                        /* Subtle: this is safe only because
2233                         * qrealloc allocated at least one extra byte */
2234                        resbuf[residx] = *sp;
2235                        if (*sp == '\0')
2236                                goto ret;
2237                        sp++;
2238                        residx++;
2239                }
2240        }
2241
2242        resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2243        strcpy(resbuf + residx, sp);
2244 ret:
2245        //bb_error_msg("end sp:'%s'%p", sp,sp);
2246        setvar_p(dest ? dest : intvar[F0], resbuf);
2247        if (regex == &sreg)
2248                regfree(regex);
2249        return match_no;
2250}
2251
2252static NOINLINE int do_mktime(const char *ds)
2253{
2254        struct tm then;
2255        int count;
2256
2257        /*memset(&then, 0, sizeof(then)); - not needed */
2258        then.tm_isdst = -1; /* default is unknown */
2259
2260        /* manpage of mktime says these fields are ints,
2261         * so we can sscanf stuff directly into them */
2262        count = sscanf(ds, "%u %u %u %u %u %u %d",
2263                &then.tm_year, &then.tm_mon, &then.tm_mday,
2264                &then.tm_hour, &then.tm_min, &then.tm_sec,
2265                &then.tm_isdst);
2266
2267        if (count < 6
2268         || (unsigned)then.tm_mon < 1
2269         || (unsigned)then.tm_year < 1900
2270        ) {
2271                return -1;
2272        }
2273
2274        then.tm_mon -= 1;
2275        then.tm_year -= 1900;
2276
2277        return mktime(&then);
2278}
2279
2280static NOINLINE var *exec_builtin(node *op, var *res)
2281{
2282#define tspl (G.exec_builtin__tspl)
2283
2284        var *tv;
2285        node *an[4];
2286        var *av[4];
2287        const char *as[4];
2288        regmatch_t pmatch[2];
2289        regex_t sreg, *re;
2290        node *spl;
2291        uint32_t isr, info;
2292        int nargs;
2293        time_t tt;
2294        int i, l, ll, n;
2295
2296        tv = nvalloc(4);
2297        isr = info = op->info;
2298        op = op->l.n;
2299
2300        av[2] = av[3] = NULL;
2301        for (i = 0; i < 4 && op; i++) {
2302                an[i] = nextarg(&op);
2303                if (isr & 0x09000000)
2304                        av[i] = evaluate(an[i], &tv[i]);
2305                if (isr & 0x08000000)
2306                        as[i] = getvar_s(av[i]);
2307                isr >>= 1;
2308        }
2309
2310        nargs = i;
2311        if ((uint32_t)nargs < (info >> 30))
2312                syntax_error(EMSG_TOO_FEW_ARGS);
2313
2314        info &= OPNMASK;
2315        switch (info) {
2316
2317        case B_a2:
2318                if (ENABLE_FEATURE_AWK_LIBM)
2319                        setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2320                else
2321                        syntax_error(EMSG_NO_MATH);
2322                break;
2323
2324        case B_sp: {
2325                char *s, *s1;
2326
2327                if (nargs > 2) {
2328                        spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2329                                an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2330                } else {
2331                        spl = &fsplitter.n;
2332                }
2333
2334                n = awk_split(as[0], spl, &s);
2335                s1 = s;
2336                clear_array(iamarray(av[1]));
2337                for (i = 1; i <= n; i++)
2338                        setari_u(av[1], i, nextword(&s));
2339                free(s1);
2340                setvar_i(res, n);
2341                break;
2342        }
2343
2344        case B_ss: {
2345                char *s;
2346
2347                l = strlen(as[0]);
2348                i = getvar_i(av[1]) - 1;
2349                if (i > l)
2350                        i = l;
2351                if (i < 0)
2352                        i = 0;
2353                n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2354                if (n < 0)
2355                        n = 0;
2356                s = xstrndup(as[0]+i, n);
2357                setvar_p(res, s);
2358                break;
2359        }
2360
2361        /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2362         * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2363        case B_an:
2364                setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2365                break;
2366
2367        case B_co:
2368                setvar_i(res, ~getvar_i_int(av[0]));
2369                break;
2370
2371        case B_ls:
2372                setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2373                break;
2374
2375        case B_or:
2376                setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2377                break;
2378
2379        case B_rs:
2380                setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2381                break;
2382
2383        case B_xo:
2384                setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2385                break;
2386
2387        case B_lo:
2388        case B_up: {
2389                char *s, *s1;
2390                s1 = s = xstrdup(as[0]);
2391                while (*s1) {
2392                        //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2393                        if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2394                                *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2395                        s1++;
2396                }
2397                setvar_p(res, s);
2398                break;
2399        }
2400
2401        case B_ix:
2402                n = 0;
2403                ll = strlen(as[1]);
2404                l = strlen(as[0]) - ll;
2405                if (ll > 0 && l >= 0) {
2406                        if (!icase) {
2407                                char *s = strstr(as[0], as[1]);
2408                                if (s)
2409                                        n = (s - as[0]) + 1;
2410                        } else {
2411                                /* this piece of code is terribly slow and
2412                                 * really should be rewritten
2413                                 */
2414                                for (i = 0; i <= l; i++) {
2415                                        if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2416                                                n = i+1;
2417                                                break;
2418                                        }
2419                                }
2420                        }
2421                }
2422                setvar_i(res, n);
2423                break;
2424
2425        case B_ti:
2426                if (nargs > 1)
2427                        tt = getvar_i(av[1]);
2428                else
2429                        time(&tt);
2430                //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2431                i = strftime(g_buf, MAXVARFMT,
2432                        ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2433                        localtime(&tt));
2434                g_buf[i] = '\0';
2435                setvar_s(res, g_buf);
2436                break;
2437
2438        case B_mt:
2439                setvar_i(res, do_mktime(as[0]));
2440                break;
2441
2442        case B_ma:
2443                re = as_regex(an[1], &sreg);
2444                n = regexec(re, as[0], 1, pmatch, 0);
2445                if (n == 0) {
2446                        pmatch[0].rm_so++;
2447                        pmatch[0].rm_eo++;
2448                } else {
2449                        pmatch[0].rm_so = 0;
2450                        pmatch[0].rm_eo = -1;
2451                }
2452                setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2453                setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2454                setvar_i(res, pmatch[0].rm_so);
2455                if (re == &sreg)
2456                        regfree(re);
2457                break;
2458
2459        case B_ge:
2460                awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2461                break;
2462
2463        case B_gs:
2464                setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2465                break;
2466
2467        case B_su:
2468                setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2469                break;
2470        }
2471
2472        nvfree(tv);
2473        return res;
2474#undef tspl
2475}
2476
2477/*
2478 * Evaluate node - the heart of the program. Supplied with subtree
2479 * and place where to store result. returns ptr to result.
2480 */
2481#define XC(n) ((n) >> 8)
2482
2483static var *evaluate(node *op, var *res)
2484{
2485/* This procedure is recursive so we should count every byte */
2486#define fnargs (G.evaluate__fnargs)
2487/* seed is initialized to 1 */
2488#define seed   (G.evaluate__seed)
2489#define sreg   (G.evaluate__sreg)
2490
2491        var *v1;
2492
2493        if (!op)
2494                return setvar_s(res, NULL);
2495
2496        debug_printf_eval("entered %s()\n", __func__);
2497
2498        v1 = nvalloc(2);
2499
2500        while (op) {
2501                struct {
2502                        var *v;
2503                        const char *s;
2504                } L = L; /* for compiler */
2505                struct {
2506                        var *v;
2507                        const char *s;
2508                } R = R;
2509                double L_d = L_d;
2510                uint32_t opinfo;
2511                int opn;
2512                node *op1;
2513
2514                opinfo = op->info;
2515                opn = (opinfo & OPNMASK);
2516                g_lineno = op->lineno;
2517                op1 = op->l.n;
2518                debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2519
2520                /* "delete" is special:
2521                 * "delete array[var--]" must evaluate index expr only once,
2522                 * must not evaluate it in "execute inevitable things" part.
2523                 */
2524                if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) {
2525                        uint32_t info = op1->info & OPCLSMASK;
2526                        var *v;
2527
2528                        debug_printf_eval("DELETE\n");
2529                        if (info == OC_VAR) {
2530                                v = op1->l.v;
2531                        } else if (info == OC_FNARG) {
2532                                v = &fnargs[op1->l.aidx];
2533                        } else {
2534                                syntax_error(EMSG_NOT_ARRAY);
2535                        }
2536                        if (op1->r.n) { /* array ref? */
2537                                const char *s;
2538                                s = getvar_s(evaluate(op1->r.n, v1));
2539                                hash_remove(iamarray(v), s);
2540                        } else {
2541                                clear_array(iamarray(v));
2542                        }
2543                        goto next;
2544                }
2545
2546                /* execute inevitable things */
2547                if (opinfo & OF_RES1)
2548                        L.v = evaluate(op1, v1);
2549                if (opinfo & OF_RES2)
2550                        R.v = evaluate(op->r.n, v1+1);
2551                if (opinfo & OF_STR1) {
2552                        L.s = getvar_s(L.v);
2553                        debug_printf_eval("L.s:'%s'\n", L.s);
2554                }
2555                if (opinfo & OF_STR2) {
2556                        R.s = getvar_s(R.v);
2557                        debug_printf_eval("R.s:'%s'\n", R.s);
2558                }
2559                if (opinfo & OF_NUM1) {
2560                        L_d = getvar_i(L.v);
2561                        debug_printf_eval("L_d:%f\n", L_d);
2562                }
2563
2564                debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2565                switch (XC(opinfo & OPCLSMASK)) {
2566
2567                /* -- iterative node type -- */
2568
2569                /* test pattern */
2570                case XC( OC_TEST ):
2571                        if ((op1->info & OPCLSMASK) == OC_COMMA) {
2572                                /* it's range pattern */
2573                                if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2574                                        op->info |= OF_CHECKED;
2575                                        if (ptest(op1->r.n))
2576                                                op->info &= ~OF_CHECKED;
2577                                        op = op->a.n;
2578                                } else {
2579                                        op = op->r.n;
2580                                }
2581                        } else {
2582                                op = ptest(op1) ? op->a.n : op->r.n;
2583                        }
2584                        break;
2585
2586                /* just evaluate an expression, also used as unconditional jump */
2587                case XC( OC_EXEC ):
2588                        break;
2589
2590                /* branch, used in if-else and various loops */
2591                case XC( OC_BR ):
2592                        op = istrue(L.v) ? op->a.n : op->r.n;
2593                        break;
2594
2595                /* initialize for-in loop */
2596                case XC( OC_WALKINIT ):
2597                        hashwalk_init(L.v, iamarray(R.v));
2598                        break;
2599
2600                /* get next array item */
2601                case XC( OC_WALKNEXT ):
2602                        op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2603                        break;
2604
2605                case XC( OC_PRINT ):
2606                case XC( OC_PRINTF ): {
2607                        FILE *F = stdout;
2608
2609                        if (op->r.n) {
2610                                rstream *rsm = newfile(R.s);
2611                                if (!rsm->F) {
2612                                        if (opn == '|') {
2613                                                rsm->F = popen(R.s, "w");
2614                                                if (rsm->F == NULL)
2615                                                        bb_perror_msg_and_die("popen");
2616                                                rsm->is_pipe = 1;
2617                                        } else {
2618                                                rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2619                                        }
2620                                }
2621                                F = rsm->F;
2622                        }
2623
2624                        if ((opinfo & OPCLSMASK) == OC_PRINT) {
2625                                if (!op1) {
2626                                        fputs(getvar_s(intvar[F0]), F);
2627                                } else {
2628                                        while (op1) {
2629                                                var *v = evaluate(nextarg(&op1), v1);
2630                                                if (v->type & VF_NUMBER) {
2631                                                        fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2632                                                                        getvar_i(v), TRUE);
2633                                                        fputs(g_buf, F);
2634                                                } else {
2635                                                        fputs(getvar_s(v), F);
2636                                                }
2637
2638                                                if (op1)
2639                                                        fputs(getvar_s(intvar[OFS]), F);
2640                                        }
2641                                }
2642                                fputs(getvar_s(intvar[ORS]), F);
2643
2644                        } else {        /* OC_PRINTF */
2645                                char *s = awk_printf(op1);
2646                                fputs(s, F);
2647                                free(s);
2648                        }
2649                        fflush(F);
2650                        break;
2651                }
2652
2653                /* case XC( OC_DELETE ): - moved to happen before arg evaluation */
2654
2655                case XC( OC_NEWSOURCE ):
2656                        g_progname = op->l.new_progname;
2657                        break;
2658
2659                case XC( OC_RETURN ):
2660                        copyvar(res, L.v);
2661                        break;
2662
2663                case XC( OC_NEXTFILE ):
2664                        nextfile = TRUE;
2665                case XC( OC_NEXT ):
2666                        nextrec = TRUE;
2667                case XC( OC_DONE ):
2668                        clrvar(res);
2669                        break;
2670
2671                case XC( OC_EXIT ):
2672                        awk_exit(L_d);
2673
2674                /* -- recursive node type -- */
2675
2676                case XC( OC_VAR ):
2677                        debug_printf_eval("VAR\n");
2678                        L.v = op->l.v;
2679                        if (L.v == intvar[NF])
2680                                split_f0();
2681                        goto v_cont;
2682
2683                case XC( OC_FNARG ):
2684                        debug_printf_eval("FNARG[%d]\n", op->l.aidx);
2685                        L.v = &fnargs[op->l.aidx];
2686 v_cont:
2687                        res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2688                        break;
2689
2690                case XC( OC_IN ):
2691                        setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2692                        break;
2693
2694                case XC( OC_REGEXP ):
2695                        op1 = op;
2696                        L.s = getvar_s(intvar[F0]);
2697                        goto re_cont;
2698
2699                case XC( OC_MATCH ):
2700                        op1 = op->r.n;
2701 re_cont:
2702                        {
2703                                regex_t *re = as_regex(op1, &sreg);
2704                                int i = regexec(re, L.s, 0, NULL, 0);
2705                                if (re == &sreg)
2706                                        regfree(re);
2707                                setvar_i(res, (i == 0) ^ (opn == '!'));
2708                        }
2709                        break;
2710
2711                case XC( OC_MOVE ):
2712                        debug_printf_eval("MOVE\n");
2713                        /* if source is a temporary string, jusk relink it to dest */
2714//Disabled: if R.v is numeric but happens to have cached R.v->string,
2715//then L.v ends up being a string, which is wrong
2716//                      if (R.v == v1+1 && R.v->string) {
2717//                              res = setvar_p(L.v, R.v->string);
2718//                              R.v->string = NULL;
2719//                      } else {
2720                                res = copyvar(L.v, R.v);
2721//                      }
2722                        break;
2723
2724                case XC( OC_TERNARY ):
2725                        if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2726                                syntax_error(EMSG_POSSIBLE_ERROR);
2727                        res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2728                        break;
2729
2730                case XC( OC_FUNC ): {
2731                        var *vbeg, *v;
2732                        const char *sv_progname;
2733
2734                        /* The body might be empty, still has to eval the args */
2735                        if (!op->r.n->info && !op->r.f->body.first)
2736                                syntax_error(EMSG_UNDEF_FUNC);
2737
2738                        vbeg = v = nvalloc(op->r.f->nargs + 1);
2739                        while (op1) {
2740                                var *arg = evaluate(nextarg(&op1), v1);
2741                                copyvar(v, arg);
2742                                v->type |= VF_CHILD;
2743                                v->x.parent = arg;
2744                                if (++v - vbeg >= op->r.f->nargs)
2745                                        break;
2746                        }
2747
2748                        v = fnargs;
2749                        fnargs = vbeg;
2750                        sv_progname = g_progname;
2751
2752                        res = evaluate(op->r.f->body.first, res);
2753
2754                        g_progname = sv_progname;
2755                        nvfree(fnargs);
2756                        fnargs = v;
2757
2758                        break;
2759                }
2760
2761                case XC( OC_GETLINE ):
2762                case XC( OC_PGETLINE ): {
2763                        rstream *rsm;
2764                        int i;
2765
2766                        if (op1) {
2767                                rsm = newfile(L.s);
2768                                if (!rsm->F) {
2769                                        if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2770                                                rsm->F = popen(L.s, "r");
2771                                                rsm->is_pipe = TRUE;
2772                                        } else {
2773                                                rsm->F = fopen_for_read(L.s);  /* not xfopen! */
2774                                        }
2775                                }
2776                        } else {
2777                                if (!iF)
2778                                        iF = next_input_file();
2779                                rsm = iF;
2780                        }
2781
2782                        if (!rsm || !rsm->F) {
2783                                setvar_i(intvar[ERRNO], errno);
2784                                setvar_i(res, -1);
2785                                break;
2786                        }
2787
2788                        if (!op->r.n)
2789                                R.v = intvar[F0];
2790
2791                        i = awk_getline(rsm, R.v);
2792                        if (i > 0 && !op1) {
2793                                incvar(intvar[FNR]);
2794                                incvar(intvar[NR]);
2795                        }
2796                        setvar_i(res, i);
2797                        break;
2798                }
2799
2800                /* simple builtins */
2801                case XC( OC_FBLTIN ): {
2802                        double R_d = R_d; /* for compiler */
2803
2804                        switch (opn) {
2805                        case F_in:
2806                                R_d = (long long)L_d;
2807                                break;
2808
2809                        case F_rn:
2810                                R_d = (double)rand() / (double)RAND_MAX;
2811                                break;
2812
2813                        case F_co:
2814                                if (ENABLE_FEATURE_AWK_LIBM) {
2815                                        R_d = cos(L_d);
2816                                        break;
2817                                }
2818
2819                        case F_ex:
2820                                if (ENABLE_FEATURE_AWK_LIBM) {
2821                                        R_d = exp(L_d);
2822                                        break;
2823                                }
2824
2825                        case F_lg:
2826                                if (ENABLE_FEATURE_AWK_LIBM) {
2827                                        R_d = log(L_d);
2828                                        break;
2829                                }
2830
2831                        case F_si:
2832                                if (ENABLE_FEATURE_AWK_LIBM) {
2833                                        R_d = sin(L_d);
2834                                        break;
2835                                }
2836
2837                        case F_sq:
2838                                if (ENABLE_FEATURE_AWK_LIBM) {
2839                                        R_d = sqrt(L_d);
2840                                        break;
2841                                }
2842
2843                                syntax_error(EMSG_NO_MATH);
2844                                break;
2845
2846                        case F_sr:
2847                                R_d = (double)seed;
2848                                seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2849                                srand(seed);
2850                                break;
2851
2852                        case F_ti:
2853                                R_d = time(NULL);
2854                                break;
2855
2856                        case F_le:
2857                                debug_printf_eval("length: L.s:'%s'\n", L.s);
2858                                if (!op1) {
2859                                        L.s = getvar_s(intvar[F0]);
2860                                        debug_printf_eval("length: L.s='%s'\n", L.s);
2861                                }
2862                                else if (L.v->type & VF_ARRAY) {
2863                                        R_d = L.v->x.array->nel;
2864                                        debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2865                                        break;
2866                                }
2867                                R_d = strlen(L.s);
2868                                break;
2869
2870                        case F_sy:
2871                                fflush_all();
2872                                R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2873                                                ? (system(L.s) >> 8) : 0;
2874                                break;
2875
2876                        case F_ff:
2877                                if (!op1) {
2878                                        fflush(stdout);
2879                                } else if (L.s && *L.s) {
2880                                        rstream *rsm = newfile(L.s);
2881                                        fflush(rsm->F);
2882                                } else {
2883                                        fflush_all();
2884                                }
2885                                break;
2886
2887                        case F_cl: {
2888                                rstream *rsm;
2889                                int err = 0;
2890                                rsm = (rstream *)hash_search(fdhash, L.s);
2891                                debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2892                                if (rsm) {
2893                                        debug_printf_eval("OC_FBLTIN F_cl "
2894                                                "rsm->is_pipe:%d, ->F:%p\n",
2895                                                rsm->is_pipe, rsm->F);
2896                                        /* Can be NULL if open failed. Example:
2897                                         * getline line <"doesnt_exist";
2898                                         * close("doesnt_exist"); <--- here rsm->F is NULL
2899                                         */
2900                                        if (rsm->F)
2901                                                err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2902                                        free(rsm->buffer);
2903                                        hash_remove(fdhash, L.s);
2904                                }
2905                                if (err)
2906                                        setvar_i(intvar[ERRNO], errno);
2907                                R_d = (double)err;
2908                                break;
2909                        }
2910                        } /* switch */
2911                        setvar_i(res, R_d);
2912                        break;
2913                }
2914
2915                case XC( OC_BUILTIN ):
2916                        res = exec_builtin(op, res);
2917                        break;
2918
2919                case XC( OC_SPRINTF ):
2920                        setvar_p(res, awk_printf(op1));
2921                        break;
2922
2923                case XC( OC_UNARY ): {
2924                        double Ld, R_d;
2925
2926                        Ld = R_d = getvar_i(R.v);
2927                        switch (opn) {
2928                        case 'P':
2929                                Ld = ++R_d;
2930                                goto r_op_change;
2931                        case 'p':
2932                                R_d++;
2933                                goto r_op_change;
2934                        case 'M':
2935                                Ld = --R_d;
2936                                goto r_op_change;
2937                        case 'm':
2938                                R_d--;
2939 r_op_change:
2940                                setvar_i(R.v, R_d);
2941                                break;
2942                        case '!':
2943                                Ld = !istrue(R.v);
2944                                break;
2945                        case '-':
2946                                Ld = -R_d;
2947                                break;
2948                        }
2949                        setvar_i(res, Ld);
2950                        break;
2951                }
2952
2953                case XC( OC_FIELD ): {
2954                        int i = (int)getvar_i(R.v);
2955                        if (i < 0)
2956                                syntax_error(EMSG_NEGATIVE_FIELD);
2957                        if (i == 0) {
2958                                res = intvar[F0];
2959                        } else {
2960                                split_f0();
2961                                if (i > nfields)
2962                                        fsrealloc(i);
2963                                res = &Fields[i - 1];
2964                        }
2965                        break;
2966                }
2967
2968                /* concatenation (" ") and index joining (",") */
2969                case XC( OC_CONCAT ):
2970                case XC( OC_COMMA ): {
2971                        const char *sep = "";
2972                        if ((opinfo & OPCLSMASK) == OC_COMMA)
2973                                sep = getvar_s(intvar[SUBSEP]);
2974                        setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2975                        break;
2976                }
2977
2978                case XC( OC_LAND ):
2979                        setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2980                        break;
2981
2982                case XC( OC_LOR ):
2983                        setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2984                        break;
2985
2986                case XC( OC_BINARY ):
2987                case XC( OC_REPLACE ): {
2988                        double R_d = getvar_i(R.v);
2989                        debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2990                        switch (opn) {
2991                        case '+':
2992                                L_d += R_d;
2993                                break;
2994                        case '-':
2995                                L_d -= R_d;
2996                                break;
2997                        case '*':
2998                                L_d *= R_d;
2999                                break;
3000                        case '/':
3001                                if (R_d == 0)
3002                                        syntax_error(EMSG_DIV_BY_ZERO);
3003                                L_d /= R_d;
3004                                break;
3005                        case '&':
3006                                if (ENABLE_FEATURE_AWK_LIBM)
3007                                        L_d = pow(L_d, R_d);
3008                                else
3009                                        syntax_error(EMSG_NO_MATH);
3010                                break;
3011                        case '%':
3012                                if (R_d == 0)
3013                                        syntax_error(EMSG_DIV_BY_ZERO);
3014                                L_d -= (long long)(L_d / R_d) * R_d;
3015                                break;
3016                        }
3017                        debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3018                        res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3019                        break;
3020                }
3021
3022                case XC( OC_COMPARE ): {
3023                        int i = i; /* for compiler */
3024                        double Ld;
3025
3026                        if (is_numeric(L.v) && is_numeric(R.v)) {
3027                                Ld = getvar_i(L.v) - getvar_i(R.v);
3028                        } else {
3029                                const char *l = getvar_s(L.v);
3030                                const char *r = getvar_s(R.v);
3031                                Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3032                        }
3033                        switch (opn & 0xfe) {
3034                        case 0:
3035                                i = (Ld > 0);
3036                                break;
3037                        case 2:
3038                                i = (Ld >= 0);
3039                                break;
3040                        case 4:
3041                                i = (Ld == 0);
3042                                break;
3043                        }
3044                        setvar_i(res, (i == 0) ^ (opn & 1));
3045                        break;
3046                }
3047
3048                default:
3049                        syntax_error(EMSG_POSSIBLE_ERROR);
3050                } /* switch */
3051 next:
3052                if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3053                        op = op->a.n;
3054                if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3055                        break;
3056                if (nextrec)
3057                        break;
3058        } /* while (op) */
3059
3060        nvfree(v1);
3061        debug_printf_eval("returning from %s(): %p\n", __func__, res);
3062        return res;
3063#undef fnargs
3064#undef seed
3065#undef sreg
3066}
3067
3068
3069/* -------- main & co. -------- */
3070
3071static int awk_exit(int r)
3072{
3073        var tv;
3074        unsigned i;
3075        hash_item *hi;
3076
3077        zero_out_var(&tv);
3078
3079        if (!exiting) {
3080                exiting = TRUE;
3081                nextrec = FALSE;
3082                evaluate(endseq.first, &tv);
3083        }
3084
3085        /* waiting for children */
3086        for (i = 0; i < fdhash->csize; i++) {
3087                hi = fdhash->items[i];
3088                while (hi) {
3089                        if (hi->data.rs.F && hi->data.rs.is_pipe)
3090                                pclose(hi->data.rs.F);
3091                        hi = hi->next;
3092                }
3093        }
3094
3095        exit(r);
3096}
3097
3098/* if expr looks like "var=value", perform assignment and return 1,
3099 * otherwise return 0 */
3100static int is_assignment(const char *expr)
3101{
3102        char *exprc, *val;
3103
3104        if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3105                return FALSE;
3106        }
3107
3108        exprc = xstrdup(expr);
3109        val = exprc + (val - expr);
3110        *val++ = '\0';
3111
3112        unescape_string_in_place(val);
3113        setvar_u(newvar(exprc), val);
3114        free(exprc);
3115        return TRUE;
3116}
3117
3118/* switch to next input file */
3119static rstream *next_input_file(void)
3120{
3121#define rsm          (G.next_input_file__rsm)
3122#define files_happen (G.next_input_file__files_happen)
3123
3124        FILE *F;
3125        const char *fname, *ind;
3126
3127        if (rsm.F)
3128                fclose(rsm.F);
3129        rsm.F = NULL;
3130        rsm.pos = rsm.adv = 0;
3131
3132        for (;;) {
3133                if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3134                        if (files_happen)
3135                                return NULL;
3136                        fname = "-";
3137                        F = stdin;
3138                        break;
3139                }
3140                ind = getvar_s(incvar(intvar[ARGIND]));
3141                fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3142                if (fname && *fname && !is_assignment(fname)) {
3143                        F = xfopen_stdin(fname);
3144                        break;
3145                }
3146        }
3147
3148        files_happen = TRUE;
3149        setvar_s(intvar[FILENAME], fname);
3150        rsm.F = F;
3151        return &rsm;
3152#undef rsm
3153#undef files_happen
3154}
3155
3156int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3157int awk_main(int argc UNUSED_PARAM, char **argv)
3158{
3159        unsigned opt;
3160        char *opt_F;
3161        llist_t *list_v = NULL;
3162        llist_t *list_f = NULL;
3163#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3164        llist_t *list_e = NULL;
3165#endif
3166        int i, j;
3167        var *v;
3168        var tv;
3169        char **envp;
3170        char *vnames = (char *)vNames; /* cheat */
3171        char *vvalues = (char *)vValues;
3172
3173        INIT_G();
3174
3175        /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3176         * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3177        if (ENABLE_LOCALE_SUPPORT)
3178                setlocale(LC_NUMERIC, "C");
3179
3180        zero_out_var(&tv);
3181
3182        /* allocate global buffer */
3183        g_buf = xmalloc(MAXVARFMT + 1);
3184
3185        vhash = hash_init();
3186        ahash = hash_init();
3187        fdhash = hash_init();
3188        fnhash = hash_init();
3189
3190        /* initialize variables */
3191        for (i = 0; *vnames; i++) {
3192                intvar[i] = v = newvar(nextword(&vnames));
3193                if (*vvalues != '\377')
3194                        setvar_s(v, nextword(&vvalues));
3195                else
3196                        setvar_i(v, 0);
3197
3198                if (*vnames == '*') {
3199                        v->type |= VF_SPECIAL;
3200                        vnames++;
3201                }
3202        }
3203
3204        handle_special(intvar[FS]);
3205        handle_special(intvar[RS]);
3206
3207        newfile("/dev/stdin")->F = stdin;
3208        newfile("/dev/stdout")->F = stdout;
3209        newfile("/dev/stderr")->F = stderr;
3210
3211        /* Huh, people report that sometimes environ is NULL. Oh well. */
3212        if (environ) for (envp = environ; *envp; envp++) {
3213                /* environ is writable, thus we don't strdup it needlessly */
3214                char *s = *envp;
3215                char *s1 = strchr(s, '=');
3216                if (s1) {
3217                        *s1 = '\0';
3218                        /* Both findvar and setvar_u take const char*
3219                         * as 2nd arg -> environment is not trashed */
3220                        setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3221                        *s1 = '=';
3222                }
3223        }
3224        opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3225        argv += optind;
3226        //argc -= optind;
3227        if (opt & OPT_W)
3228                bb_error_msg("warning: option -W is ignored");
3229        if (opt & OPT_F) {
3230                unescape_string_in_place(opt_F);
3231                setvar_s(intvar[FS], opt_F);
3232        }
3233        while (list_v) {
3234                if (!is_assignment(llist_pop(&list_v)))
3235                        bb_show_usage();
3236        }
3237        while (list_f) {
3238                char *s = NULL;
3239                FILE *from_file;
3240
3241                g_progname = llist_pop(&list_f);
3242                from_file = xfopen_stdin(g_progname);
3243                /* one byte is reserved for some trick in next_token */
3244                for (i = j = 1; j > 0; i += j) {
3245                        s = xrealloc(s, i + 4096);
3246                        j = fread(s + i, 1, 4094, from_file);
3247                }
3248                s[i] = '\0';
3249                fclose(from_file);
3250                parse_program(s + 1);
3251                free(s);
3252        }
3253        g_progname = "cmd. line";
3254#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3255        while (list_e) {
3256                parse_program(llist_pop(&list_e));
3257        }
3258#endif
3259        if (!(opt & (OPT_f | OPT_e))) {
3260                if (!*argv)
3261                        bb_show_usage();
3262                parse_program(*argv++);
3263        }
3264
3265        /* fill in ARGV array */
3266        setari_u(intvar[ARGV], 0, "awk");
3267        i = 0;
3268        while (*argv)
3269                setari_u(intvar[ARGV], ++i, *argv++);
3270        setvar_i(intvar[ARGC], i + 1);
3271
3272        evaluate(beginseq.first, &tv);
3273        if (!mainseq.first && !endseq.first)
3274                awk_exit(EXIT_SUCCESS);
3275
3276        /* input file could already be opened in BEGIN block */
3277        if (!iF)
3278                iF = next_input_file();
3279
3280        /* passing through input files */
3281        while (iF) {
3282                nextfile = FALSE;
3283                setvar_i(intvar[FNR], 0);
3284
3285                while ((i = awk_getline(iF, intvar[F0])) > 0) {
3286                        nextrec = FALSE;
3287                        incvar(intvar[NR]);
3288                        incvar(intvar[FNR]);
3289                        evaluate(mainseq.first, &tv);
3290
3291                        if (nextfile)
3292                                break;
3293                }
3294
3295                if (i < 0)
3296                        syntax_error(strerror(errno));
3297
3298                iF = next_input_file();
3299        }
3300
3301        awk_exit(EXIT_SUCCESS);
3302        /*return 0;*/
3303}
3304