1
2
3
4
5
6
7
8
9
10#include "libbb.h"
11#include "xregex.h"
12#include <math.h>
13
14
15
16
17#define MAXVARFMT 240
18#define MINNVBLOCK 64
19
20
21#define VF_NUMBER 0x0001
22#define VF_ARRAY 0x0002
23
24#define VF_CACHED 0x0100
25#define VF_USER 0x0200
26#define VF_SPECIAL 0x0400
27#define VF_WALK 0x0800
28#define VF_FSTR 0x1000
29#define VF_CHILD 0x2000
30#define VF_DIRTY 0x4000
31
32
33#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35
36typedef struct var_s {
37 unsigned type;
38 double number;
39 char *string;
40 union {
41 int aidx;
42 struct xhash_s *array;
43 struct var_s *parent;
44 char **walker;
45 } x;
46} var;
47
48
49typedef struct chain_s {
50 struct node_s *first;
51 struct node_s *last;
52 const char *programname;
53} chain;
54
55
56typedef struct func_s {
57 unsigned nargs;
58 struct chain_s body;
59} func;
60
61
62typedef struct rstream_s {
63 FILE *F;
64 char *buffer;
65 int adv;
66 int size;
67 int pos;
68 smallint is_pipe;
69} rstream;
70
71typedef struct hash_item_s {
72 union {
73 struct var_s v;
74 struct rstream_s rs;
75 struct func_s f;
76 } data;
77 struct hash_item_s *next;
78 char name[1];
79} hash_item;
80
81typedef struct xhash_s {
82 unsigned nel;
83 unsigned csize;
84 unsigned nprime;
85 unsigned glen;
86 struct hash_item_s **items;
87} xhash;
88
89
90typedef struct node_s {
91 uint32_t info;
92 unsigned lineno;
93 union {
94 struct node_s *n;
95 var *v;
96 int i;
97 char *s;
98 regex_t *re;
99 } l;
100 union {
101 struct node_s *n;
102 regex_t *ire;
103 func *f;
104 int argno;
105 } r;
106 union {
107 struct node_s *n;
108 } a;
109} node;
110
111
112typedef struct nvblock_s {
113 int size;
114 var *pos;
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
117 var nv[0];
118} nvblock;
119
120typedef struct tsplitter_s {
121 node n;
122 regex_t re[2];
123} tsplitter;
124
125
126
127#define TC_SEQSTART 1
128#define TC_SEQTERM (1 << 1)
129#define TC_REGEXP (1 << 2)
130#define TC_OUTRDR (1 << 3)
131#define TC_UOPPOST (1 << 4)
132#define TC_UOPPRE1 (1 << 5)
133#define TC_BINOPX (1 << 6)
134#define TC_IN (1 << 7)
135#define TC_COMMA (1 << 8)
136#define TC_PIPE (1 << 9)
137#define TC_UOPPRE2 (1 << 10)
138#define TC_ARRTERM (1 << 11)
139#define TC_GRPSTART (1 << 12)
140#define TC_GRPTERM (1 << 13)
141#define TC_SEMICOL (1 << 14)
142#define TC_NEWLINE (1 << 15)
143#define TC_STATX (1 << 16)
144#define TC_WHILE (1 << 17)
145#define TC_ELSE (1 << 18)
146#define TC_BUILTIN (1 << 19)
147#define TC_GETLINE (1 << 20)
148#define TC_FUNCDECL (1 << 21)
149#define TC_BEGIN (1 << 22)
150#define TC_END (1 << 23)
151#define TC_EOF (1 << 24)
152#define TC_VARIABLE (1 << 25)
153#define TC_ARRAY (1 << 26)
154#define TC_FUNCTION (1 << 27)
155#define TC_STRING (1 << 28)
156#define TC_NUMBER (1 << 29)
157
158#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
159
160
161#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166#define TC_STATEMNT (TC_STATX | TC_WHILE)
167#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
168
169
170#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173
174#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
176
177
178#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179
180#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182
183
184#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188#define OF_RES1 0x010000
189#define OF_RES2 0x020000
190#define OF_STR1 0x040000
191#define OF_STR2 0x080000
192#define OF_NUM1 0x100000
193#define OF_CHECKED 0x200000
194
195
196#define xx 0
197#define xV OF_RES2
198#define xS (OF_RES2 | OF_STR2)
199#define Vx OF_RES1
200#define VV (OF_RES1 | OF_RES2)
201#define Nx (OF_RES1 | OF_NUM1)
202#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203#define Sx (OF_RES1 | OF_STR1)
204#define SV (OF_RES1 | OF_STR1 | OF_RES2)
205#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207#define OPCLSMASK 0xFF00
208#define OPNMASK 0x007F
209
210
211
212
213
214#define P(x) (x << 24)
215#define PRIMASK 0x7F000000
216#define PRIMASK2 0x7E000000
217
218
219
220#define SHIFT_TIL_THIS 0x0600
221#define RECUR_FROM_THIS 0x1000
222
223enum {
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
226
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
230
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
239 OC_DONE = 0x2800,
240
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
242 ST_WHILE = 0x3300
243};
244
245
246enum {
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
249};
250
251
252enum {
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
254 B_ge, B_gs, B_su,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
256};
257
258
259
260#define NTC "\377"
261#define NTCC '\377'
262
263#define OC_B OC_BUILTIN
264
265static const char tokenlist[] ALIGN1 =
266 "\1(" NTC
267 "\1)" NTC
268 "\1/" NTC
269 "\2>>" "\1>" "\1|" NTC
270 "\2++" "\2--" NTC
271 "\2++" "\2--" "\1$" NTC
272 "\2==" "\1=" "\2+=" "\2-="
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
279 "\2in" NTC
280 "\1," NTC
281 "\1|" NTC
282 "\1+" "\1-" "\1!" NTC
283 "\1]" NTC
284 "\1{" NTC
285 "\1}" NTC
286 "\1;" NTC
287 "\1\n" NTC
288 "\2if" "\2do" "\3for" "\5break"
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
292 "\5while" NTC
293 "\4else" NTC
294
295 "\3and" "\5compl" "\6lshift" "\2or"
296 "\6rshift" "\3xor"
297 "\5close" "\6system" "\6fflush" "\5atan2"
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime"
303 "\7tolower" "\7toupper" NTC
304 "\7getline" NTC
305 "\4func" "\10function" NTC
306 "\5BEGIN" NTC
307 "\3END" "\0"
308 ;
309
310static const uint32_t tokeninfo[] = {
311 0,
312 0,
313 OC_REGEXP,
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
317 OC_FIELD|xV|P(5),
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
332 OC_IN|SV|P(49),
333 OC_COMMA|SS|P(80),
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
337 0,
338 0,
339 0,
340 0,
341 0,
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
346 ST_WHILE,
347 0,
348
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358 OC_GETLINE|SV|P(0),
359 0, 0,
360 0,
361 0
362};
363
364
365
366enum {
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, F0, ARGIND, ARGC,
370 ARGV, ERRNO, FNR, NR,
371 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
372};
373
374static const char vNames[] ALIGN1 =
375 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
376 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
377 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
378 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
379 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
380
381static const char vValues[] ALIGN1 =
382 "%.6g\0" "%.6g\0" " \0" " \0"
383 "\n\0" "\n\0" "\0" "\0"
384 "\034\0" "\0" "\377";
385
386
387#define FIRST_PRIME 61
388static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
389
390
391
392
393
394
395
396struct globals {
397 double t_double;
398 chain beginseq, mainseq, endseq;
399 chain *seq;
400 node *break_ptr, *continue_ptr;
401 rstream *iF;
402 xhash *vhash, *ahash, *fdhash, *fnhash;
403 const char *g_progname;
404 int g_lineno;
405 int nfields;
406 int maxfields;
407 var *Fields;
408 nvblock *g_cb;
409 char *g_pos;
410 char *g_buf;
411 smallint icase;
412 smallint exiting;
413 smallint nextrec;
414 smallint nextfile;
415 smallint is_f0_split;
416};
417struct globals2 {
418 uint32_t t_info;
419 uint32_t t_tclass;
420 char *t_string;
421 int t_lineno;
422 int t_rollback;
423
424 var *intvar[NUM_INTERNAL_VARS];
425
426
427 char *split_f0__fstrings;
428
429 uint32_t next_token__save_tclass;
430 uint32_t next_token__save_info;
431 uint32_t next_token__ltclass;
432 smallint next_token__concat_inserted;
433
434 smallint next_input_file__files_happen;
435 rstream next_input_file__rsm;
436
437 var *evaluate__fnargs;
438 unsigned evaluate__seed;
439 regex_t evaluate__sreg;
440
441 var ptest__v;
442
443 tsplitter exec_builtin__tspl;
444
445
446 tsplitter fsplitter, rsplitter;
447};
448#define G1 (ptr_to_globals[-1])
449#define G (*(struct globals2 *)ptr_to_globals)
450
451
452
453
454
455#define t_double (G1.t_double )
456#define beginseq (G1.beginseq )
457#define mainseq (G1.mainseq )
458#define endseq (G1.endseq )
459#define seq (G1.seq )
460#define break_ptr (G1.break_ptr )
461#define continue_ptr (G1.continue_ptr)
462#define iF (G1.iF )
463#define vhash (G1.vhash )
464#define ahash (G1.ahash )
465#define fdhash (G1.fdhash )
466#define fnhash (G1.fnhash )
467#define g_progname (G1.g_progname )
468#define g_lineno (G1.g_lineno )
469#define nfields (G1.nfields )
470#define maxfields (G1.maxfields )
471#define Fields (G1.Fields )
472#define g_cb (G1.g_cb )
473#define g_pos (G1.g_pos )
474#define g_buf (G1.g_buf )
475#define icase (G1.icase )
476#define exiting (G1.exiting )
477#define nextrec (G1.nextrec )
478#define nextfile (G1.nextfile )
479#define is_f0_split (G1.is_f0_split )
480#define t_info (G.t_info )
481#define t_tclass (G.t_tclass )
482#define t_string (G.t_string )
483#define t_lineno (G.t_lineno )
484#define t_rollback (G.t_rollback )
485#define intvar (G.intvar )
486#define fsplitter (G.fsplitter )
487#define rsplitter (G.rsplitter )
488#define INIT_G() do { \
489 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
492} while (0)
493
494
495
496static void handle_special(var *);
497static node *parse_expr(uint32_t);
498static void chain_group(void);
499static var *evaluate(node *, var *);
500static rstream *next_input_file(void);
501static int fmt_num(char *, int, const char *, double, int);
502static int awk_exit(int) NORETURN;
503
504
505
506static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515#if !ENABLE_FEATURE_AWK_LIBM
516static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517#endif
518
519static void zero_out_var(var * vp)
520{
521 memset(vp, 0, sizeof(*vp));
522}
523
524static void syntax_error(const char *const message) NORETURN;
525static void syntax_error(const char *const message)
526{
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528}
529
530
531
532static unsigned hashidx(const char *name)
533{
534 unsigned idx = 0;
535
536 while (*name) idx = *name++ + (idx << 6) - idx;
537 return idx;
538}
539
540
541static xhash *hash_init(void)
542{
543 xhash *newhash;
544
545 newhash = xzalloc(sizeof(xhash));
546 newhash->csize = FIRST_PRIME;
547 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
548
549 return newhash;
550}
551
552
553static void *hash_search(xhash *hash, const char *name)
554{
555 hash_item *hi;
556
557 hi = hash->items [ hashidx(name) % hash->csize ];
558 while (hi) {
559 if (strcmp(hi->name, name) == 0)
560 return &(hi->data);
561 hi = hi->next;
562 }
563 return NULL;
564}
565
566
567static void hash_rebuild(xhash *hash)
568{
569 unsigned newsize, i, idx;
570 hash_item **newitems, *hi, *thi;
571
572 if (hash->nprime == ARRAY_SIZE(PRIMES))
573 return;
574
575 newsize = PRIMES[hash->nprime++];
576 newitems = xzalloc(newsize * sizeof(hash_item *));
577
578 for (i = 0; i < hash->csize; i++) {
579 hi = hash->items[i];
580 while (hi) {
581 thi = hi;
582 hi = thi->next;
583 idx = hashidx(thi->name) % newsize;
584 thi->next = newitems[idx];
585 newitems[idx] = thi;
586 }
587 }
588
589 free(hash->items);
590 hash->csize = newsize;
591 hash->items = newitems;
592}
593
594
595static void *hash_find(xhash *hash, const char *name)
596{
597 hash_item *hi;
598 unsigned idx;
599 int l;
600
601 hi = hash_search(hash, name);
602 if (!hi) {
603 if (++hash->nel / hash->csize > 10)
604 hash_rebuild(hash);
605
606 l = strlen(name) + 1;
607 hi = xzalloc(sizeof(*hi) + l);
608 strcpy(hi->name, name);
609
610 idx = hashidx(name) % hash->csize;
611 hi->next = hash->items[idx];
612 hash->items[idx] = hi;
613 hash->glen += l;
614 }
615 return &(hi->data);
616}
617
618#define findvar(hash, name) ((var*) hash_find((hash), (name)))
619#define newvar(name) ((var*) hash_find(vhash, (name)))
620#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
621#define newfunc(name) ((func*) hash_find(fnhash, (name)))
622
623static void hash_remove(xhash *hash, const char *name)
624{
625 hash_item *hi, **phi;
626
627 phi = &(hash->items[hashidx(name) % hash->csize]);
628 while (*phi) {
629 hi = *phi;
630 if (strcmp(hi->name, name) == 0) {
631 hash->glen -= (strlen(name) + 1);
632 hash->nel--;
633 *phi = hi->next;
634 free(hi);
635 break;
636 }
637 phi = &(hi->next);
638 }
639}
640
641
642
643static void skip_spaces(char **s)
644{
645 char *p = *s;
646
647 while (1) {
648 if (*p == '\\' && p[1] == '\n') {
649 p++;
650 t_lineno++;
651 } else if (*p != ' ' && *p != '\t') {
652 break;
653 }
654 p++;
655 }
656 *s = p;
657}
658
659static char *nextword(char **s)
660{
661 char *p = *s;
662
663 while (*(*s)++) ;
664
665 return p;
666}
667
668static char nextchar(char **s)
669{
670 char c, *pps;
671
672 c = *((*s)++);
673 pps = *s;
674 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675 if (c == '\\' && *s == pps) c = *((*s)++);
676 return c;
677}
678
679static ALWAYS_INLINE int isalnum_(int c)
680{
681 return (isalnum(c) || c == '_');
682}
683
684static double my_strtod(char **pp)
685{
686#if ENABLE_DESKTOP
687 if ((*pp)[0] == '0'
688 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
689 ) {
690 return strtoull(*pp, pp, 0);
691 }
692#endif
693 return strtod(*pp, pp);
694}
695
696
697
698static xhash *iamarray(var *v)
699{
700 var *a = v;
701
702 while (a->type & VF_CHILD)
703 a = a->x.parent;
704
705 if (!(a->type & VF_ARRAY)) {
706 a->type |= VF_ARRAY;
707 a->x.array = hash_init();
708 }
709 return a->x.array;
710}
711
712static void clear_array(xhash *array)
713{
714 unsigned i;
715 hash_item *hi, *thi;
716
717 for (i = 0; i < array->csize; i++) {
718 hi = array->items[i];
719 while (hi) {
720 thi = hi;
721 hi = hi->next;
722 free(thi->data.v.string);
723 free(thi);
724 }
725 array->items[i] = NULL;
726 }
727 array->glen = array->nel = 0;
728}
729
730
731static var *clrvar(var *v)
732{
733 if (!(v->type & VF_FSTR))
734 free(v->string);
735
736 v->type &= VF_DONTTOUCH;
737 v->type |= VF_DIRTY;
738 v->string = NULL;
739 return v;
740}
741
742
743static var *setvar_p(var *v, char *value)
744{
745 clrvar(v);
746 v->string = value;
747 handle_special(v);
748 return v;
749}
750
751
752static var *setvar_s(var *v, const char *value)
753{
754 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
755}
756
757
758static var *setvar_u(var *v, const char *value)
759{
760 setvar_s(v, value);
761 v->type |= VF_USER;
762 return v;
763}
764
765
766static void setari_u(var *a, int idx, const char *s)
767{
768 char sidx[sizeof(int)*3 + 1];
769 var *v;
770
771 sprintf(sidx, "%d", idx);
772 v = findvar(iamarray(a), sidx);
773 setvar_u(v, s);
774}
775
776
777static var *setvar_i(var *v, double value)
778{
779 clrvar(v);
780 v->type |= VF_NUMBER;
781 v->number = value;
782 handle_special(v);
783 return v;
784}
785
786static const char *getvar_s(var *v)
787{
788
789 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791 v->string = xstrdup(g_buf);
792 v->type |= VF_CACHED;
793 }
794 return (v->string == NULL) ? "" : v->string;
795}
796
797static double getvar_i(var *v)
798{
799 char *s;
800
801 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
802 v->number = 0;
803 s = v->string;
804 if (s && *s) {
805 v->number = my_strtod(&s);
806 if (v->type & VF_USER) {
807 skip_spaces(&s);
808 if (*s != '\0')
809 v->type &= ~VF_USER;
810 }
811 } else {
812 v->type &= ~VF_USER;
813 }
814 v->type |= VF_CACHED;
815 }
816 return v->number;
817}
818
819
820static unsigned long getvar_i_int(var *v)
821{
822 double d = getvar_i(v);
823
824
825
826 if (d >= 0)
827 return (unsigned long)d;
828
829 return - (long) (unsigned long) (-d);
830}
831
832static var *copyvar(var *dest, const var *src)
833{
834 if (dest != src) {
835 clrvar(dest);
836 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837 dest->number = src->number;
838 if (src->string)
839 dest->string = xstrdup(src->string);
840 }
841 handle_special(dest);
842 return dest;
843}
844
845static var *incvar(var *v)
846{
847 return setvar_i(v, getvar_i(v) + 1.);
848}
849
850
851static int is_numeric(var *v)
852{
853 getvar_i(v);
854 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
855}
856
857
858static int istrue(var *v)
859{
860 if (is_numeric(v))
861 return (v->number == 0) ? 0 : 1;
862 return (v->string && *(v->string)) ? 1 : 0;
863}
864
865
866static var *nvalloc(int n)
867{
868 nvblock *pb = NULL;
869 var *v, *r;
870 int size;
871
872 while (g_cb) {
873 pb = g_cb;
874 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
875 g_cb = g_cb->next;
876 }
877
878 if (!g_cb) {
879 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
880 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
881 g_cb->size = size;
882 g_cb->pos = g_cb->nv;
883 g_cb->prev = pb;
884
885 if (pb) pb->next = g_cb;
886 }
887
888 v = r = g_cb->pos;
889 g_cb->pos += n;
890
891 while (v < g_cb->pos) {
892 v->type = 0;
893 v->string = NULL;
894 v++;
895 }
896
897 return r;
898}
899
900static void nvfree(var *v)
901{
902 var *p;
903
904 if (v < g_cb->nv || v >= g_cb->pos)
905 syntax_error(EMSG_INTERNAL_ERROR);
906
907 for (p = v; p < g_cb->pos; p++) {
908 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
909 clear_array(iamarray(p));
910 free(p->x.array->items);
911 free(p->x.array);
912 }
913 if (p->type & VF_WALK)
914 free(p->x.walker);
915
916 clrvar(p);
917 }
918
919 g_cb->pos = v;
920 while (g_cb->prev && g_cb->pos == g_cb->nv) {
921 g_cb = g_cb->prev;
922 }
923}
924
925
926
927
928
929
930static uint32_t next_token(uint32_t expected)
931{
932#define concat_inserted (G.next_token__concat_inserted)
933#define save_tclass (G.next_token__save_tclass)
934#define save_info (G.next_token__save_info)
935
936#define ltclass (G.next_token__ltclass)
937
938 char *p, *pp, *s;
939 const char *tl;
940 uint32_t tc;
941 const uint32_t *ti;
942 int l;
943
944 if (t_rollback) {
945 t_rollback = FALSE;
946
947 } else if (concat_inserted) {
948 concat_inserted = FALSE;
949 t_tclass = save_tclass;
950 t_info = save_info;
951
952 } else {
953 p = g_pos;
954 readnext:
955 skip_spaces(&p);
956 g_lineno = t_lineno;
957 if (*p == '#')
958 while (*p != '\n' && *p != '\0')
959 p++;
960
961 if (*p == '\n')
962 t_lineno++;
963
964 if (*p == '\0') {
965 tc = TC_EOF;
966
967 } else if (*p == '\"') {
968
969 t_string = s = ++p;
970 while (*p != '\"') {
971 if (*p == '\0' || *p == '\n')
972 syntax_error(EMSG_UNEXP_EOS);
973 *(s++) = nextchar(&p);
974 }
975 p++;
976 *s = '\0';
977 tc = TC_STRING;
978
979 } else if ((expected & TC_REGEXP) && *p == '/') {
980
981 t_string = s = ++p;
982 while (*p != '/') {
983 if (*p == '\0' || *p == '\n')
984 syntax_error(EMSG_UNEXP_EOS);
985 *s = *p++;
986 if (*s++ == '\\') {
987 pp = p;
988 *(s-1) = bb_process_escape_sequence((const char **)&p);
989 if (*pp == '\\')
990 *s++ = '\\';
991 if (p == pp)
992 *s++ = *p++;
993 }
994 }
995 p++;
996 *s = '\0';
997 tc = TC_REGEXP;
998
999 } else if (*p == '.' || isdigit(*p)) {
1000
1001 t_double = my_strtod(&p);
1002 if (*p == '.')
1003 syntax_error(EMSG_UNEXP_TOKEN);
1004 tc = TC_NUMBER;
1005
1006 } else {
1007
1008 tl = tokenlist;
1009 tc = 0x00000001;
1010 ti = tokeninfo;
1011 while (*tl) {
1012 l = *(tl++);
1013 if (l == NTCC) {
1014 tc <<= 1;
1015 continue;
1016 }
1017
1018
1019
1020
1021 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1022 && *tl == *p && strncmp(p, tl, l) == 0
1023 && !((tc & TC_WORD) && isalnum_(p[l]))
1024 ) {
1025 t_info = *ti;
1026 p += l;
1027 break;
1028 }
1029 ti++;
1030 tl += l;
1031 }
1032
1033 if (!*tl) {
1034
1035
1036
1037 if (!isalnum_(*p))
1038 syntax_error(EMSG_UNEXP_TOKEN);
1039
1040 t_string = --p;
1041 while (isalnum_(*(++p))) {
1042 *(p-1) = *p;
1043 }
1044 *(p-1) = '\0';
1045 tc = TC_VARIABLE;
1046
1047 if (!(expected & TC_VARIABLE))
1048 skip_spaces(&p);
1049 if (*p == '(') {
1050 tc = TC_FUNCTION;
1051 } else {
1052 if (*p == '[') {
1053 p++;
1054 tc = TC_ARRAY;
1055 }
1056 }
1057 }
1058 }
1059 g_pos = p;
1060
1061
1062 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1063 goto readnext;
1064
1065
1066 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1067 concat_inserted = TRUE;
1068 save_tclass = tc;
1069 save_info = t_info;
1070 tc = TC_BINOP;
1071 t_info = OC_CONCAT | SS | P(35);
1072 }
1073
1074 t_tclass = tc;
1075 }
1076 ltclass = t_tclass;
1077
1078
1079 if (!(ltclass & expected))
1080 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1081 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1082
1083 return ltclass;
1084#undef concat_inserted
1085#undef save_tclass
1086#undef save_info
1087#undef ltclass
1088}
1089
1090static void rollback_token(void)
1091{
1092 t_rollback = TRUE;
1093}
1094
1095static node *new_node(uint32_t info)
1096{
1097 node *n;
1098
1099 n = xzalloc(sizeof(node));
1100 n->info = info;
1101 n->lineno = g_lineno;
1102 return n;
1103}
1104
1105static node *mk_re_node(const char *s, node *n, regex_t *re)
1106{
1107 n->info = OC_REGEXP;
1108 n->l.re = re;
1109 n->r.ire = re + 1;
1110 xregcomp(re, s, REG_EXTENDED);
1111 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1112
1113 return n;
1114}
1115
1116static node *condition(void)
1117{
1118 next_token(TC_SEQSTART);
1119 return parse_expr(TC_SEQTERM);
1120}
1121
1122
1123
1124static node *parse_expr(uint32_t iexp)
1125{
1126 node sn;
1127 node *cn = &sn;
1128 node *vn, *glptr;
1129 uint32_t tc, xtc;
1130 var *v;
1131
1132 sn.info = PRIMASK;
1133 sn.r.n = glptr = NULL;
1134 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1135
1136 while (!((tc = next_token(xtc)) & iexp)) {
1137 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1138
1139 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1140 cn->a.n = glptr;
1141 xtc = TC_OPERAND | TC_UOPPRE;
1142 glptr = NULL;
1143
1144 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1145
1146
1147 vn = cn;
1148 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1149 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1150 vn = vn->a.n;
1151 if ((t_info & OPCLSMASK) == OC_TERNARY)
1152 t_info += P(6);
1153 cn = vn->a.n->r.n = new_node(t_info);
1154 cn->a.n = vn->a.n;
1155 if (tc & TC_BINOP) {
1156 cn->l.n = vn;
1157 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1159
1160 next_token(TC_GETLINE);
1161
1162 cn->info &= ~PRIMASK;
1163 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1164 }
1165 } else {
1166 cn->r.n = vn;
1167 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1168 }
1169 vn->a.n = cn;
1170
1171 } else {
1172
1173
1174 vn = cn;
1175 cn = vn->r.n = new_node(t_info);
1176 cn->a.n = vn;
1177 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1178 if (tc & (TC_OPERAND | TC_REGEXP)) {
1179 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1180
1181
1182 switch (tc) {
1183 case TC_VARIABLE:
1184 case TC_ARRAY:
1185 cn->info = OC_VAR;
1186 v = hash_search(ahash, t_string);
1187 if (v != NULL) {
1188 cn->info = OC_FNARG;
1189 cn->l.i = v->x.aidx;
1190 } else {
1191 cn->l.v = newvar(t_string);
1192 }
1193 if (tc & TC_ARRAY) {
1194 cn->info |= xS;
1195 cn->r.n = parse_expr(TC_ARRTERM);
1196 }
1197 break;
1198
1199 case TC_NUMBER:
1200 case TC_STRING:
1201 cn->info = OC_VAR;
1202 v = cn->l.v = xzalloc(sizeof(var));
1203 if (tc & TC_NUMBER)
1204 setvar_i(v, t_double);
1205 else
1206 setvar_s(v, t_string);
1207 break;
1208
1209 case TC_REGEXP:
1210 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1211 break;
1212
1213 case TC_FUNCTION:
1214 cn->info = OC_FUNC;
1215 cn->r.f = newfunc(t_string);
1216 cn->l.n = condition();
1217 break;
1218
1219 case TC_SEQSTART:
1220 cn = vn->r.n = parse_expr(TC_SEQTERM);
1221 cn->a.n = vn;
1222 break;
1223
1224 case TC_GETLINE:
1225 glptr = cn;
1226 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1227 break;
1228
1229 case TC_BUILTIN:
1230 cn->l.n = condition();
1231 break;
1232 }
1233 }
1234 }
1235 }
1236 return sn.r.n;
1237}
1238
1239
1240static node *chain_node(uint32_t info)
1241{
1242 node *n;
1243
1244 if (!seq->first)
1245 seq->first = seq->last = new_node(0);
1246
1247 if (seq->programname != g_progname) {
1248 seq->programname = g_progname;
1249 n = chain_node(OC_NEWSOURCE);
1250 n->l.s = xstrdup(g_progname);
1251 }
1252
1253 n = seq->last;
1254 n->info = info;
1255 seq->last = n->a.n = new_node(OC_DONE);
1256
1257 return n;
1258}
1259
1260static void chain_expr(uint32_t info)
1261{
1262 node *n;
1263
1264 n = chain_node(info);
1265 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1266 if (t_tclass & TC_GRPTERM)
1267 rollback_token();
1268}
1269
1270static node *chain_loop(node *nn)
1271{
1272 node *n, *n2, *save_brk, *save_cont;
1273
1274 save_brk = break_ptr;
1275 save_cont = continue_ptr;
1276
1277 n = chain_node(OC_BR | Vx);
1278 continue_ptr = new_node(OC_EXEC);
1279 break_ptr = new_node(OC_EXEC);
1280 chain_group();
1281 n2 = chain_node(OC_EXEC | Vx);
1282 n2->l.n = nn;
1283 n2->a.n = n;
1284 continue_ptr->a.n = n2;
1285 break_ptr->a.n = n->r.n = seq->last;
1286
1287 continue_ptr = save_cont;
1288 break_ptr = save_brk;
1289
1290 return n;
1291}
1292
1293
1294static void chain_group(void)
1295{
1296 uint32_t c;
1297 node *n, *n2, *n3;
1298
1299 do {
1300 c = next_token(TC_GRPSEQ);
1301 } while (c & TC_NEWLINE);
1302
1303 if (c & TC_GRPSTART) {
1304 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1305 if (t_tclass & TC_NEWLINE) continue;
1306 rollback_token();
1307 chain_group();
1308 }
1309 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1310 rollback_token();
1311 chain_expr(OC_EXEC | Vx);
1312 } else {
1313 switch (t_info & OPCLSMASK) {
1314 case ST_IF:
1315 n = chain_node(OC_BR | Vx);
1316 n->l.n = condition();
1317 chain_group();
1318 n2 = chain_node(OC_EXEC);
1319 n->r.n = seq->last;
1320 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1321 chain_group();
1322 n2->a.n = seq->last;
1323 } else {
1324 rollback_token();
1325 }
1326 break;
1327
1328 case ST_WHILE:
1329 n2 = condition();
1330 n = chain_loop(NULL);
1331 n->l.n = n2;
1332 break;
1333
1334 case ST_DO:
1335 n2 = chain_node(OC_EXEC);
1336 n = chain_loop(NULL);
1337 n2->a.n = n->a.n;
1338 next_token(TC_WHILE);
1339 n->l.n = condition();
1340 break;
1341
1342 case ST_FOR:
1343 next_token(TC_SEQSTART);
1344 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1345 if (t_tclass & TC_SEQTERM) {
1346 if ((n2->info & OPCLSMASK) != OC_IN)
1347 syntax_error(EMSG_UNEXP_TOKEN);
1348 n = chain_node(OC_WALKINIT | VV);
1349 n->l.n = n2->l.n;
1350 n->r.n = n2->r.n;
1351 n = chain_loop(NULL);
1352 n->info = OC_WALKNEXT | Vx;
1353 n->l.n = n2->l.n;
1354 } else {
1355 n = chain_node(OC_EXEC | Vx);
1356 n->l.n = n2;
1357 n2 = parse_expr(TC_SEMICOL);
1358 n3 = parse_expr(TC_SEQTERM);
1359 n = chain_loop(n3);
1360 n->l.n = n2;
1361 if (!n2)
1362 n->info = OC_EXEC;
1363 }
1364 break;
1365
1366 case OC_PRINT:
1367 case OC_PRINTF:
1368 n = chain_node(t_info);
1369 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1370 if (t_tclass & TC_OUTRDR) {
1371 n->info |= t_info;
1372 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1373 }
1374 if (t_tclass & TC_GRPTERM)
1375 rollback_token();
1376 break;
1377
1378 case OC_BREAK:
1379 n = chain_node(OC_EXEC);
1380 n->a.n = break_ptr;
1381 break;
1382
1383 case OC_CONTINUE:
1384 n = chain_node(OC_EXEC);
1385 n->a.n = continue_ptr;
1386 break;
1387
1388
1389 default:
1390 chain_expr(t_info);
1391 }
1392 }
1393}
1394
1395static void parse_program(char *p)
1396{
1397 uint32_t tclass;
1398 node *cn;
1399 func *f;
1400 var *v;
1401
1402 g_pos = p;
1403 t_lineno = 1;
1404 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1405 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1406
1407 if (tclass & TC_OPTERM)
1408 continue;
1409
1410 seq = &mainseq;
1411 if (tclass & TC_BEGIN) {
1412 seq = &beginseq;
1413 chain_group();
1414
1415 } else if (tclass & TC_END) {
1416 seq = &endseq;
1417 chain_group();
1418
1419 } else if (tclass & TC_FUNCDECL) {
1420 next_token(TC_FUNCTION);
1421 g_pos++;
1422 f = newfunc(t_string);
1423 f->body.first = NULL;
1424 f->nargs = 0;
1425 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1426 v = findvar(ahash, t_string);
1427 v->x.aidx = (f->nargs)++;
1428
1429 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1430 break;
1431 }
1432 seq = &(f->body);
1433 chain_group();
1434 clear_array(ahash);
1435
1436 } else if (tclass & TC_OPSEQ) {
1437 rollback_token();
1438 cn = chain_node(OC_TEST);
1439 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1440 if (t_tclass & TC_GRPSTART) {
1441 rollback_token();
1442 chain_group();
1443 } else {
1444 chain_node(OC_PRINT);
1445 }
1446 cn->r.n = mainseq.last;
1447
1448 } else {
1449 rollback_token();
1450 chain_group();
1451 }
1452 }
1453}
1454
1455
1456
1457
1458static node *mk_splitter(const char *s, tsplitter *spl)
1459{
1460 regex_t *re, *ire;
1461 node *n;
1462
1463 re = &spl->re[0];
1464 ire = &spl->re[1];
1465 n = &spl->n;
1466 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1467 regfree(re);
1468 regfree(ire);
1469 }
1470 if (strlen(s) > 1) {
1471 mk_re_node(s, n, re);
1472 } else {
1473 n->info = (uint32_t) *s;
1474 }
1475
1476 return n;
1477}
1478
1479
1480
1481
1482
1483static regex_t *as_regex(node *op, regex_t *preg)
1484{
1485 int cflags;
1486 var *v;
1487 const char *s;
1488
1489 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1490 return icase ? op->r.ire : op->l.re;
1491 }
1492 v = nvalloc(1);
1493 s = getvar_s(evaluate(op, v));
1494
1495 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1496
1497
1498
1499
1500
1501 if (regcomp(preg, s, cflags)) {
1502 cflags &= ~REG_EXTENDED;
1503 xregcomp(preg, s, cflags);
1504 }
1505 nvfree(v);
1506 return preg;
1507}
1508
1509
1510static void qrealloc(char **b, int n, int *size)
1511{
1512 if (!*b || n >= *size) {
1513 *size = n + (n>>1) + 80;
1514 *b = xrealloc(*b, *size);
1515 }
1516}
1517
1518
1519static void fsrealloc(int size)
1520{
1521 int i;
1522
1523 if (size >= maxfields) {
1524 i = maxfields;
1525 maxfields = size + 16;
1526 Fields = xrealloc(Fields, maxfields * sizeof(var));
1527 for (; i < maxfields; i++) {
1528 Fields[i].type = VF_SPECIAL;
1529 Fields[i].string = NULL;
1530 }
1531 }
1532
1533 if (size < nfields) {
1534 for (i = size; i < nfields; i++) {
1535 clrvar(Fields + i);
1536 }
1537 }
1538 nfields = size;
1539}
1540
1541static int awk_split(const char *s, node *spl, char **slist)
1542{
1543 int l, n = 0;
1544 char c[4];
1545 char *s1;
1546 regmatch_t pmatch[2];
1547
1548
1549 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1550 strcpy(s1, s);
1551
1552 c[0] = c[1] = (char)spl->info;
1553 c[2] = c[3] = '\0';
1554 if (*getvar_s(intvar[RS]) == '\0')
1555 c[2] = '\n';
1556
1557 if ((spl->info & OPCLSMASK) == OC_REGEXP) {
1558 if (!*s)
1559 return n;
1560 n++;
1561 do {
1562 l = strcspn(s, c+2);
1563 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1564 && pmatch[0].rm_so <= l
1565 ) {
1566 l = pmatch[0].rm_so;
1567 if (pmatch[0].rm_eo == 0) {
1568 l++;
1569 pmatch[0].rm_eo++;
1570 }
1571 n++;
1572 } else {
1573 pmatch[0].rm_eo = l;
1574 if (s[l])
1575 pmatch[0].rm_eo++;
1576 }
1577 memcpy(s1, s, l);
1578
1579 do {
1580 s1[l] = '\0';
1581 } while (++l < pmatch[0].rm_eo);
1582 nextword(&s1);
1583 s += pmatch[0].rm_eo;
1584 } while (*s);
1585 return n;
1586 }
1587 if (c[0] == '\0') {
1588 while (*s) {
1589 *s1++ = *s++;
1590 *s1++ = '\0';
1591 n++;
1592 }
1593 return n;
1594 }
1595 if (c[0] != ' ') {
1596 if (icase) {
1597 c[0] = toupper(c[0]);
1598 c[1] = tolower(c[1]);
1599 }
1600 if (*s1) n++;
1601 while ((s1 = strpbrk(s1, c))) {
1602 *s1++ = '\0';
1603 n++;
1604 }
1605 return n;
1606 }
1607
1608 while (*s) {
1609 s = skip_whitespace(s);
1610 if (!*s) break;
1611 n++;
1612 while (*s && !isspace(*s))
1613 *s1++ = *s++;
1614 *s1++ = '\0';
1615 }
1616 return n;
1617}
1618
1619static void split_f0(void)
1620{
1621
1622#define fstrings (G.split_f0__fstrings)
1623
1624 int i, n;
1625 char *s;
1626
1627 if (is_f0_split)
1628 return;
1629
1630 is_f0_split = TRUE;
1631 free(fstrings);
1632 fsrealloc(0);
1633 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1634 fsrealloc(n);
1635 s = fstrings;
1636 for (i = 0; i < n; i++) {
1637 Fields[i].string = nextword(&s);
1638 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1639 }
1640
1641
1642 clrvar(intvar[NF]);
1643 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1644 intvar[NF]->number = nfields;
1645#undef fstrings
1646}
1647
1648
1649static void handle_special(var *v)
1650{
1651 int n;
1652 char *b;
1653 const char *sep, *s;
1654 int sl, l, len, i, bsize;
1655
1656 if (!(v->type & VF_SPECIAL))
1657 return;
1658
1659 if (v == intvar[NF]) {
1660 n = (int)getvar_i(v);
1661 fsrealloc(n);
1662
1663
1664 sep = getvar_s(intvar[OFS]);
1665 sl = strlen(sep);
1666 b = NULL;
1667 len = 0;
1668 for (i = 0; i < n; i++) {
1669 s = getvar_s(&Fields[i]);
1670 l = strlen(s);
1671 if (b) {
1672 memcpy(b+len, sep, sl);
1673 len += sl;
1674 }
1675 qrealloc(&b, len+l+sl, &bsize);
1676 memcpy(b+len, s, l);
1677 len += l;
1678 }
1679 if (b)
1680 b[len] = '\0';
1681 setvar_p(intvar[F0], b);
1682 is_f0_split = TRUE;
1683
1684 } else if (v == intvar[F0]) {
1685 is_f0_split = FALSE;
1686
1687 } else if (v == intvar[FS]) {
1688 mk_splitter(getvar_s(v), &fsplitter);
1689
1690 } else if (v == intvar[RS]) {
1691 mk_splitter(getvar_s(v), &rsplitter);
1692
1693 } else if (v == intvar[IGNORECASE]) {
1694 icase = istrue(v);
1695
1696 } else {
1697 n = getvar_i(intvar[NF]);
1698 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1699
1700 }
1701}
1702
1703
1704static node *nextarg(node **pn)
1705{
1706 node *n;
1707
1708 n = *pn;
1709 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1710 *pn = n->r.n;
1711 n = n->l.n;
1712 } else {
1713 *pn = NULL;
1714 }
1715 return n;
1716}
1717
1718static void hashwalk_init(var *v, xhash *array)
1719{
1720 char **w;
1721 hash_item *hi;
1722 unsigned i;
1723
1724 if (v->type & VF_WALK)
1725 free(v->x.walker);
1726
1727 v->type |= VF_WALK;
1728 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1729 w[0] = w[1] = (char *)(w + 2);
1730 for (i = 0; i < array->csize; i++) {
1731 hi = array->items[i];
1732 while (hi) {
1733 strcpy(*w, hi->name);
1734 nextword(w);
1735 hi = hi->next;
1736 }
1737 }
1738}
1739
1740static int hashwalk_next(var *v)
1741{
1742 char **w;
1743
1744 w = v->x.walker;
1745 if (w[1] == w[0])
1746 return FALSE;
1747
1748 setvar_s(v, nextword(w+1));
1749 return TRUE;
1750}
1751
1752
1753static int ptest(node *pattern)
1754{
1755
1756 return istrue(evaluate(pattern, &G.ptest__v));
1757}
1758
1759
1760static int awk_getline(rstream *rsm, var *v)
1761{
1762 char *b;
1763 regmatch_t pmatch[2];
1764 int a, p, pp=0, size;
1765 int fd, so, eo, r, rp;
1766 char c, *m, *s;
1767
1768
1769
1770
1771 fd = fileno(rsm->F);
1772 m = rsm->buffer;
1773 a = rsm->adv;
1774 p = rsm->pos;
1775 size = rsm->size;
1776 c = (char) rsplitter.n.info;
1777 rp = 0;
1778
1779 if (!m) qrealloc(&m, 256, &size);
1780 do {
1781 b = m + a;
1782 so = eo = p;
1783 r = 1;
1784 if (p > 0) {
1785 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1786 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1787 b, 1, pmatch, 0) == 0) {
1788 so = pmatch[0].rm_so;
1789 eo = pmatch[0].rm_eo;
1790 if (b[eo] != '\0')
1791 break;
1792 }
1793 } else if (c != '\0') {
1794 s = strchr(b+pp, c);
1795 if (!s) s = memchr(b+pp, '\0', p - pp);
1796 if (s) {
1797 so = eo = s-b;
1798 eo++;
1799 break;
1800 }
1801 } else {
1802 while (b[rp] == '\n')
1803 rp++;
1804 s = strstr(b+rp, "\n\n");
1805 if (s) {
1806 so = eo = s-b;
1807 while (b[eo] == '\n') eo++;
1808 if (b[eo] != '\0')
1809 break;
1810 }
1811 }
1812 }
1813
1814 if (a > 0) {
1815 memmove(m, (const void *)(m+a), p+1);
1816 b = m;
1817 a = 0;
1818 }
1819
1820 qrealloc(&m, a+p+128, &size);
1821 b = m + a;
1822 pp = p;
1823 p += safe_read(fd, b+p, size-p-1);
1824 if (p < pp) {
1825 p = 0;
1826 r = 0;
1827 setvar_i(intvar[ERRNO], errno);
1828 }
1829 b[p] = '\0';
1830
1831 } while (p > pp);
1832
1833 if (p == 0) {
1834 r--;
1835 } else {
1836 c = b[so]; b[so] = '\0';
1837 setvar_s(v, b+rp);
1838 v->type |= VF_USER;
1839 b[so] = c;
1840 c = b[eo]; b[eo] = '\0';
1841 setvar_s(intvar[RT], b+so);
1842 b[eo] = c;
1843 }
1844
1845 rsm->buffer = m;
1846 rsm->adv = a + eo;
1847 rsm->pos = p - eo;
1848 rsm->size = size;
1849
1850 return r;
1851}
1852
1853static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1854{
1855 int r = 0;
1856 char c;
1857 const char *s = format;
1858
1859 if (int_as_int && n == (int)n) {
1860 r = snprintf(b, size, "%d", (int)n);
1861 } else {
1862 do { c = *s; } while (c && *++s);
1863 if (strchr("diouxX", c)) {
1864 r = snprintf(b, size, format, (int)n);
1865 } else if (strchr("eEfgG", c)) {
1866 r = snprintf(b, size, format, n);
1867 } else {
1868 syntax_error(EMSG_INV_FMT);
1869 }
1870 }
1871 return r;
1872}
1873
1874
1875static char *awk_printf(node *n)
1876{
1877 char *b = NULL;
1878 char *fmt, *s, *f;
1879 const char *s1;
1880 int i, j, incr, bsize;
1881 char c, c1;
1882 var *v, *arg;
1883
1884 v = nvalloc(1);
1885 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1886
1887 i = 0;
1888 while (*f) {
1889 s = f;
1890 while (*f && (*f != '%' || *(++f) == '%'))
1891 f++;
1892 while (*f && !isalpha(*f)) {
1893 if (*f == '*')
1894 syntax_error("%*x formats are not supported");
1895 f++;
1896 }
1897
1898 incr = (f - s) + MAXVARFMT;
1899 qrealloc(&b, incr + i, &bsize);
1900 c = *f;
1901 if (c != '\0') f++;
1902 c1 = *f;
1903 *f = '\0';
1904 arg = evaluate(nextarg(&n), v);
1905
1906 j = i;
1907 if (c == 'c' || !c) {
1908 i += sprintf(b+i, s, is_numeric(arg) ?
1909 (char)getvar_i(arg) : *getvar_s(arg));
1910 } else if (c == 's') {
1911 s1 = getvar_s(arg);
1912 qrealloc(&b, incr+i+strlen(s1), &bsize);
1913 i += sprintf(b+i, s, s1);
1914 } else {
1915 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1916 }
1917 *f = c1;
1918
1919
1920 if (i < j) i = j;
1921 }
1922
1923 b = xrealloc(b, i + 1);
1924 free(fmt);
1925 nvfree(v);
1926 b[i] = '\0';
1927 return b;
1928}
1929
1930
1931
1932
1933
1934
1935
1936static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1937{
1938 char *ds = NULL;
1939 const char *s;
1940 const char *sp;
1941 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1942 regmatch_t pmatch[10];
1943 regex_t sreg, *re;
1944
1945 re = as_regex(rn, &sreg);
1946 if (!src) src = intvar[F0];
1947 if (!dest) dest = intvar[F0];
1948
1949 i = di = 0;
1950 sp = getvar_s(src);
1951 rl = strlen(repl);
1952 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1953 so = pmatch[0].rm_so;
1954 eo = pmatch[0].rm_eo;
1955
1956 qrealloc(&ds, di + eo + rl, &dssize);
1957 memcpy(ds + di, sp, eo);
1958 di += eo;
1959 if (++i >= nm) {
1960
1961 di -= (eo - so);
1962 nbs = 0;
1963 for (s = repl; *s; s++) {
1964 ds[di++] = c = *s;
1965 if (c == '\\') {
1966 nbs++;
1967 continue;
1968 }
1969 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1970 di -= ((nbs + 3) >> 1);
1971 j = 0;
1972 if (c != '&') {
1973 j = c - '0';
1974 nbs++;
1975 }
1976 if (nbs % 2) {
1977 ds[di++] = c;
1978 } else {
1979 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1980 qrealloc(&ds, di + rl + n, &dssize);
1981 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1982 di += n;
1983 }
1984 }
1985 nbs = 0;
1986 }
1987 }
1988
1989 sp += eo;
1990 if (i == nm) break;
1991 if (eo == so) {
1992 ds[di] = *sp++;
1993 if (!ds[di++]) break;
1994 }
1995 }
1996
1997 qrealloc(&ds, di + strlen(sp), &dssize);
1998 strcpy(ds + di, sp);
1999 setvar_p(dest, ds);
2000 if (re == &sreg) regfree(re);
2001 return i;
2002}
2003
2004static var *exec_builtin(node *op, var *res)
2005{
2006#define tspl (G.exec_builtin__tspl)
2007
2008 int (*to_xxx)(int);
2009 var *tv;
2010 node *an[4];
2011 var *av[4];
2012 const char *as[4];
2013 regmatch_t pmatch[2];
2014 regex_t sreg, *re;
2015 node *spl;
2016 uint32_t isr, info;
2017 int nargs;
2018 time_t tt;
2019 char *s, *s1;
2020 int i, l, ll, n;
2021
2022 tv = nvalloc(4);
2023 isr = info = op->info;
2024 op = op->l.n;
2025
2026 av[2] = av[3] = NULL;
2027 for (i = 0; i < 4 && op; i++) {
2028 an[i] = nextarg(&op);
2029 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2030 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2031 isr >>= 1;
2032 }
2033
2034 nargs = i;
2035 if ((uint32_t)nargs < (info >> 30))
2036 syntax_error(EMSG_TOO_FEW_ARGS);
2037
2038 switch (info & OPNMASK) {
2039
2040 case B_a2:
2041#if ENABLE_FEATURE_AWK_LIBM
2042 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2043#else
2044 syntax_error(EMSG_NO_MATH);
2045#endif
2046 break;
2047
2048 case B_sp:
2049 if (nargs > 2) {
2050 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2051 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2052 } else {
2053 spl = &fsplitter.n;
2054 }
2055
2056 n = awk_split(as[0], spl, &s);
2057 s1 = s;
2058 clear_array(iamarray(av[1]));
2059 for (i = 1; i <= n; i++)
2060 setari_u(av[1], i, nextword(&s1));
2061 free(s);
2062 setvar_i(res, n);
2063 break;
2064
2065 case B_ss:
2066 l = strlen(as[0]);
2067 i = getvar_i(av[1]) - 1;
2068 if (i > l) i = l;
2069 if (i < 0) i = 0;
2070 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2071 if (n < 0) n = 0;
2072 s = xstrndup(as[0]+i, n);
2073 setvar_p(res, s);
2074 break;
2075
2076
2077
2078 case B_an:
2079 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2080 break;
2081
2082 case B_co:
2083 setvar_i(res, ~getvar_i_int(av[0]));
2084 break;
2085
2086 case B_ls:
2087 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2088 break;
2089
2090 case B_or:
2091 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2092 break;
2093
2094 case B_rs:
2095 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2096 break;
2097
2098 case B_xo:
2099 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2100 break;
2101
2102 case B_lo:
2103 to_xxx = tolower;
2104 goto lo_cont;
2105
2106 case B_up:
2107 to_xxx = toupper;
2108 lo_cont:
2109 s1 = s = xstrdup(as[0]);
2110 while (*s1) {
2111 *s1 = (*to_xxx)(*s1);
2112 s1++;
2113 }
2114 setvar_p(res, s);
2115 break;
2116
2117 case B_ix:
2118 n = 0;
2119 ll = strlen(as[1]);
2120 l = strlen(as[0]) - ll;
2121 if (ll > 0 && l >= 0) {
2122 if (!icase) {
2123 s = strstr(as[0], as[1]);
2124 if (s) n = (s - as[0]) + 1;
2125 } else {
2126
2127
2128
2129 for (i=0; i<=l; i++) {
2130 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2131 n = i+1;
2132 break;
2133 }
2134 }
2135 }
2136 }
2137 setvar_i(res, n);
2138 break;
2139
2140 case B_ti:
2141 if (nargs > 1)
2142 tt = getvar_i(av[1]);
2143 else
2144 time(&tt);
2145
2146 i = strftime(g_buf, MAXVARFMT,
2147 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2148 localtime(&tt));
2149 g_buf[i] = '\0';
2150 setvar_s(res, g_buf);
2151 break;
2152
2153 case B_ma:
2154 re = as_regex(an[1], &sreg);
2155 n = regexec(re, as[0], 1, pmatch, 0);
2156 if (n == 0) {
2157 pmatch[0].rm_so++;
2158 pmatch[0].rm_eo++;
2159 } else {
2160 pmatch[0].rm_so = 0;
2161 pmatch[0].rm_eo = -1;
2162 }
2163 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2164 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2165 setvar_i(res, pmatch[0].rm_so);
2166 if (re == &sreg) regfree(re);
2167 break;
2168
2169 case B_ge:
2170 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2171 break;
2172
2173 case B_gs:
2174 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2175 break;
2176
2177 case B_su:
2178 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2179 break;
2180 }
2181
2182 nvfree(tv);
2183 return res;
2184#undef tspl
2185}
2186
2187
2188
2189
2190
2191#define XC(n) ((n) >> 8)
2192
2193static var *evaluate(node *op, var *res)
2194{
2195
2196#define fnargs (G.evaluate__fnargs)
2197
2198#define seed (G.evaluate__seed)
2199#define sreg (G.evaluate__sreg)
2200
2201 node *op1;
2202 var *v1;
2203 union {
2204 var *v;
2205 const char *s;
2206 double d;
2207 int i;
2208 } L, R;
2209 uint32_t opinfo;
2210 int opn;
2211 union {
2212 char *s;
2213 rstream *rsm;
2214 FILE *F;
2215 var *v;
2216 regex_t *re;
2217 uint32_t info;
2218 } X;
2219
2220 if (!op)
2221 return setvar_s(res, NULL);
2222
2223 v1 = nvalloc(2);
2224
2225 while (op) {
2226 opinfo = op->info;
2227 opn = (opinfo & OPNMASK);
2228 g_lineno = op->lineno;
2229
2230
2231 op1 = op->l.n;
2232 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2233 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2234 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2235 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2236 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2237
2238 switch (XC(opinfo & OPCLSMASK)) {
2239
2240
2241
2242
2243 case XC( OC_TEST ):
2244 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2245
2246 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2247 op->info |= OF_CHECKED;
2248 if (ptest(op1->r.n))
2249 op->info &= ~OF_CHECKED;
2250
2251 op = op->a.n;
2252 } else {
2253 op = op->r.n;
2254 }
2255 } else {
2256 op = (ptest(op1)) ? op->a.n : op->r.n;
2257 }
2258 break;
2259
2260
2261 case XC( OC_EXEC ):
2262 break;
2263
2264
2265 case XC( OC_BR ):
2266 op = istrue(L.v) ? op->a.n : op->r.n;
2267 break;
2268
2269
2270 case XC( OC_WALKINIT ):
2271 hashwalk_init(L.v, iamarray(R.v));
2272 break;
2273
2274
2275 case XC( OC_WALKNEXT ):
2276 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2277 break;
2278
2279 case XC( OC_PRINT ):
2280 case XC( OC_PRINTF ):
2281 X.F = stdout;
2282 if (op->r.n) {
2283 X.rsm = newfile(R.s);
2284 if (!X.rsm->F) {
2285 if (opn == '|') {
2286 X.rsm->F = popen(R.s, "w");
2287 if (X.rsm->F == NULL)
2288 bb_perror_msg_and_die("popen");
2289 X.rsm->is_pipe = 1;
2290 } else {
2291 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2292 }
2293 }
2294 X.F = X.rsm->F;
2295 }
2296
2297 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2298 if (!op1) {
2299 fputs(getvar_s(intvar[F0]), X.F);
2300 } else {
2301 while (op1) {
2302 L.v = evaluate(nextarg(&op1), v1);
2303 if (L.v->type & VF_NUMBER) {
2304 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2305 getvar_i(L.v), TRUE);
2306 fputs(g_buf, X.F);
2307 } else {
2308 fputs(getvar_s(L.v), X.F);
2309 }
2310
2311 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2312 }
2313 }
2314 fputs(getvar_s(intvar[ORS]), X.F);
2315
2316 } else {
2317 L.s = awk_printf(op1);
2318 fputs(L.s, X.F);
2319 free((char*)L.s);
2320 }
2321 fflush(X.F);
2322 break;
2323
2324 case XC( OC_DELETE ):
2325 X.info = op1->info & OPCLSMASK;
2326 if (X.info == OC_VAR) {
2327 R.v = op1->l.v;
2328 } else if (X.info == OC_FNARG) {
2329 R.v = &fnargs[op1->l.i];
2330 } else {
2331 syntax_error(EMSG_NOT_ARRAY);
2332 }
2333
2334 if (op1->r.n) {
2335 clrvar(L.v);
2336 L.s = getvar_s(evaluate(op1->r.n, v1));
2337 hash_remove(iamarray(R.v), L.s);
2338 } else {
2339 clear_array(iamarray(R.v));
2340 }
2341 break;
2342
2343 case XC( OC_NEWSOURCE ):
2344 g_progname = op->l.s;
2345 break;
2346
2347 case XC( OC_RETURN ):
2348 copyvar(res, L.v);
2349 break;
2350
2351 case XC( OC_NEXTFILE ):
2352 nextfile = TRUE;
2353 case XC( OC_NEXT ):
2354 nextrec = TRUE;
2355 case XC( OC_DONE ):
2356 clrvar(res);
2357 break;
2358
2359 case XC( OC_EXIT ):
2360 awk_exit(L.d);
2361
2362
2363
2364 case XC( OC_VAR ):
2365 L.v = op->l.v;
2366 if (L.v == intvar[NF])
2367 split_f0();
2368 goto v_cont;
2369
2370 case XC( OC_FNARG ):
2371 L.v = &fnargs[op->l.i];
2372 v_cont:
2373 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2374 break;
2375
2376 case XC( OC_IN ):
2377 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2378 break;
2379
2380 case XC( OC_REGEXP ):
2381 op1 = op;
2382 L.s = getvar_s(intvar[F0]);
2383 goto re_cont;
2384
2385 case XC( OC_MATCH ):
2386 op1 = op->r.n;
2387 re_cont:
2388 X.re = as_regex(op1, &sreg);
2389 R.i = regexec(X.re, L.s, 0, NULL, 0);
2390 if (X.re == &sreg) regfree(X.re);
2391 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2392 break;
2393
2394 case XC( OC_MOVE ):
2395
2396 if (R.v == v1+1 && R.v->string) {
2397 res = setvar_p(L.v, R.v->string);
2398 R.v->string = NULL;
2399 } else {
2400 res = copyvar(L.v, R.v);
2401 }
2402 break;
2403
2404 case XC( OC_TERNARY ):
2405 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2406 syntax_error(EMSG_POSSIBLE_ERROR);
2407 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2408 break;
2409
2410 case XC( OC_FUNC ):
2411 if (!op->r.f->body.first)
2412 syntax_error(EMSG_UNDEF_FUNC);
2413
2414 X.v = R.v = nvalloc(op->r.f->nargs+1);
2415 while (op1) {
2416 L.v = evaluate(nextarg(&op1), v1);
2417 copyvar(R.v, L.v);
2418 R.v->type |= VF_CHILD;
2419 R.v->x.parent = L.v;
2420 if (++R.v - X.v >= op->r.f->nargs)
2421 break;
2422 }
2423
2424 R.v = fnargs;
2425 fnargs = X.v;
2426
2427 L.s = g_progname;
2428 res = evaluate(op->r.f->body.first, res);
2429 g_progname = L.s;
2430
2431 nvfree(fnargs);
2432 fnargs = R.v;
2433 break;
2434
2435 case XC( OC_GETLINE ):
2436 case XC( OC_PGETLINE ):
2437 if (op1) {
2438 X.rsm = newfile(L.s);
2439 if (!X.rsm->F) {
2440 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2441 X.rsm->F = popen(L.s, "r");
2442 X.rsm->is_pipe = TRUE;
2443 } else {
2444 X.rsm->F = fopen_for_read(L.s);
2445 }
2446 }
2447 } else {
2448 if (!iF) iF = next_input_file();
2449 X.rsm = iF;
2450 }
2451
2452 if (!X.rsm->F) {
2453 setvar_i(intvar[ERRNO], errno);
2454 setvar_i(res, -1);
2455 break;
2456 }
2457
2458 if (!op->r.n)
2459 R.v = intvar[F0];
2460
2461 L.i = awk_getline(X.rsm, R.v);
2462 if (L.i > 0) {
2463 if (!op1) {
2464 incvar(intvar[FNR]);
2465 incvar(intvar[NR]);
2466 }
2467 }
2468 setvar_i(res, L.i);
2469 break;
2470
2471
2472 case XC( OC_FBLTIN ):
2473 switch (opn) {
2474
2475 case F_in:
2476 R.d = (int)L.d;
2477 break;
2478
2479 case F_rn:
2480 R.d = (double)rand() / (double)RAND_MAX;
2481 break;
2482#if ENABLE_FEATURE_AWK_LIBM
2483 case F_co:
2484 R.d = cos(L.d);
2485 break;
2486
2487 case F_ex:
2488 R.d = exp(L.d);
2489 break;
2490
2491 case F_lg:
2492 R.d = log(L.d);
2493 break;
2494
2495 case F_si:
2496 R.d = sin(L.d);
2497 break;
2498
2499 case F_sq:
2500 R.d = sqrt(L.d);
2501 break;
2502#else
2503 case F_co:
2504 case F_ex:
2505 case F_lg:
2506 case F_si:
2507 case F_sq:
2508 syntax_error(EMSG_NO_MATH);
2509 break;
2510#endif
2511 case F_sr:
2512 R.d = (double)seed;
2513 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2514 srand(seed);
2515 break;
2516
2517 case F_ti:
2518 R.d = time(NULL);
2519 break;
2520
2521 case F_le:
2522 if (!op1)
2523 L.s = getvar_s(intvar[F0]);
2524 R.d = strlen(L.s);
2525 break;
2526
2527 case F_sy:
2528 fflush(NULL);
2529 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2530 ? (system(L.s) >> 8) : 0;
2531 break;
2532
2533 case F_ff:
2534 if (!op1)
2535 fflush(stdout);
2536 else {
2537 if (L.s && *L.s) {
2538 X.rsm = newfile(L.s);
2539 fflush(X.rsm->F);
2540 } else {
2541 fflush(NULL);
2542 }
2543 }
2544 break;
2545
2546 case F_cl:
2547 X.rsm = (rstream *)hash_search(fdhash, L.s);
2548 if (X.rsm) {
2549 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2550 free(X.rsm->buffer);
2551 hash_remove(fdhash, L.s);
2552 }
2553 if (R.i != 0)
2554 setvar_i(intvar[ERRNO], errno);
2555 R.d = (double)R.i;
2556 break;
2557 }
2558 setvar_i(res, R.d);
2559 break;
2560
2561 case XC( OC_BUILTIN ):
2562 res = exec_builtin(op, res);
2563 break;
2564
2565 case XC( OC_SPRINTF ):
2566 setvar_p(res, awk_printf(op1));
2567 break;
2568
2569 case XC( OC_UNARY ):
2570 X.v = R.v;
2571 L.d = R.d = getvar_i(R.v);
2572 switch (opn) {
2573 case 'P':
2574 L.d = ++R.d;
2575 goto r_op_change;
2576 case 'p':
2577 R.d++;
2578 goto r_op_change;
2579 case 'M':
2580 L.d = --R.d;
2581 goto r_op_change;
2582 case 'm':
2583 R.d--;
2584 goto r_op_change;
2585 case '!':
2586 L.d = istrue(X.v) ? 0 : 1;
2587 break;
2588 case '-':
2589 L.d = -R.d;
2590 break;
2591 r_op_change:
2592 setvar_i(X.v, R.d);
2593 }
2594 setvar_i(res, L.d);
2595 break;
2596
2597 case XC( OC_FIELD ):
2598 R.i = (int)getvar_i(R.v);
2599 if (R.i == 0) {
2600 res = intvar[F0];
2601 } else {
2602 split_f0();
2603 if (R.i > nfields)
2604 fsrealloc(R.i);
2605 res = &Fields[R.i - 1];
2606 }
2607 break;
2608
2609
2610 case XC( OC_CONCAT ):
2611 case XC( OC_COMMA ):
2612 opn = strlen(L.s) + strlen(R.s) + 2;
2613 X.s = xmalloc(opn);
2614 strcpy(X.s, L.s);
2615 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2616 L.s = getvar_s(intvar[SUBSEP]);
2617 X.s = xrealloc(X.s, opn + strlen(L.s));
2618 strcat(X.s, L.s);
2619 }
2620 strcat(X.s, R.s);
2621 setvar_p(res, X.s);
2622 break;
2623
2624 case XC( OC_LAND ):
2625 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2626 break;
2627
2628 case XC( OC_LOR ):
2629 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2630 break;
2631
2632 case XC( OC_BINARY ):
2633 case XC( OC_REPLACE ):
2634 R.d = getvar_i(R.v);
2635 switch (opn) {
2636 case '+':
2637 L.d += R.d;
2638 break;
2639 case '-':
2640 L.d -= R.d;
2641 break;
2642 case '*':
2643 L.d *= R.d;
2644 break;
2645 case '/':
2646 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2647 L.d /= R.d;
2648 break;
2649 case '&':
2650#if ENABLE_FEATURE_AWK_LIBM
2651 L.d = pow(L.d, R.d);
2652#else
2653 syntax_error(EMSG_NO_MATH);
2654#endif
2655 break;
2656 case '%':
2657 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2658 L.d -= (int)(L.d / R.d) * R.d;
2659 break;
2660 }
2661 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2662 break;
2663
2664 case XC( OC_COMPARE ):
2665 if (is_numeric(L.v) && is_numeric(R.v)) {
2666 L.d = getvar_i(L.v) - getvar_i(R.v);
2667 } else {
2668 L.s = getvar_s(L.v);
2669 R.s = getvar_s(R.v);
2670 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2671 }
2672 switch (opn & 0xfe) {
2673 case 0:
2674 R.i = (L.d > 0);
2675 break;
2676 case 2:
2677 R.i = (L.d >= 0);
2678 break;
2679 case 4:
2680 R.i = (L.d == 0);
2681 break;
2682 }
2683 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2684 break;
2685
2686 default:
2687 syntax_error(EMSG_POSSIBLE_ERROR);
2688 }
2689 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2690 op = op->a.n;
2691 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2692 break;
2693 if (nextrec)
2694 break;
2695 }
2696 nvfree(v1);
2697 return res;
2698#undef fnargs
2699#undef seed
2700#undef sreg
2701}
2702
2703
2704
2705
2706static int awk_exit(int r)
2707{
2708 var tv;
2709 unsigned i;
2710 hash_item *hi;
2711
2712 zero_out_var(&tv);
2713
2714 if (!exiting) {
2715 exiting = TRUE;
2716 nextrec = FALSE;
2717 evaluate(endseq.first, &tv);
2718 }
2719
2720
2721 for (i = 0; i < fdhash->csize; i++) {
2722 hi = fdhash->items[i];
2723 while (hi) {
2724 if (hi->data.rs.F && hi->data.rs.is_pipe)
2725 pclose(hi->data.rs.F);
2726 hi = hi->next;
2727 }
2728 }
2729
2730 exit(r);
2731}
2732
2733
2734
2735static int is_assignment(const char *expr)
2736{
2737 char *exprc, *s, *s0, *s1;
2738
2739 exprc = xstrdup(expr);
2740 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2741 free(exprc);
2742 return FALSE;
2743 }
2744
2745 *(s++) = '\0';
2746 s0 = s1 = s;
2747 while (*s)
2748 *(s1++) = nextchar(&s);
2749
2750 *s1 = '\0';
2751 setvar_u(newvar(exprc), s0);
2752 free(exprc);
2753 return TRUE;
2754}
2755
2756
2757static rstream *next_input_file(void)
2758{
2759#define rsm (G.next_input_file__rsm)
2760#define files_happen (G.next_input_file__files_happen)
2761
2762 FILE *F = NULL;
2763 const char *fname, *ind;
2764
2765 if (rsm.F) fclose(rsm.F);
2766 rsm.F = NULL;
2767 rsm.pos = rsm.adv = 0;
2768
2769 do {
2770 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2771 if (files_happen)
2772 return NULL;
2773 fname = "-";
2774 F = stdin;
2775 } else {
2776 ind = getvar_s(incvar(intvar[ARGIND]));
2777 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2778 if (fname && *fname && !is_assignment(fname))
2779 F = xfopen_stdin(fname);
2780 }
2781 } while (!F);
2782
2783 files_happen = TRUE;
2784 setvar_s(intvar[FILENAME], fname);
2785 rsm.F = F;
2786 return &rsm;
2787#undef rsm
2788#undef files_happen
2789}
2790
2791int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2792int awk_main(int argc, char **argv)
2793{
2794 unsigned opt;
2795 char *opt_F, *opt_W;
2796 llist_t *list_v = NULL;
2797 llist_t *list_f = NULL;
2798 int i, j;
2799 var *v;
2800 var tv;
2801 char **envp;
2802 char *vnames = (char *)vNames;
2803 char *vvalues = (char *)vValues;
2804
2805 INIT_G();
2806
2807
2808
2809 if (ENABLE_LOCALE_SUPPORT)
2810 setlocale(LC_NUMERIC, "C");
2811
2812 zero_out_var(&tv);
2813
2814
2815 g_buf = xmalloc(MAXVARFMT + 1);
2816
2817 vhash = hash_init();
2818 ahash = hash_init();
2819 fdhash = hash_init();
2820 fnhash = hash_init();
2821
2822
2823 for (i = 0; *vnames; i++) {
2824 intvar[i] = v = newvar(nextword(&vnames));
2825 if (*vvalues != '\377')
2826 setvar_s(v, nextword(&vvalues));
2827 else
2828 setvar_i(v, 0);
2829
2830 if (*vnames == '*') {
2831 v->type |= VF_SPECIAL;
2832 vnames++;
2833 }
2834 }
2835
2836 handle_special(intvar[FS]);
2837 handle_special(intvar[RS]);
2838
2839 newfile("/dev/stdin")->F = stdin;
2840 newfile("/dev/stdout")->F = stdout;
2841 newfile("/dev/stderr")->F = stderr;
2842
2843
2844 if (environ) for (envp = environ; *envp; envp++) {
2845
2846 char *s = *envp;
2847 char *s1 = strchr(s, '=');
2848 if (s1) {
2849 *s1 = '\0';
2850
2851
2852 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2853 *s1 = '=';
2854 }
2855 }
2856 opt_complementary = "v::f::";
2857 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2858 argv += optind;
2859 argc -= optind;
2860 if (opt & 0x1)
2861 setvar_s(intvar[FS], opt_F);
2862 while (list_v) {
2863 if (!is_assignment(llist_pop(&list_v)))
2864 bb_show_usage();
2865 }
2866 if (list_f) {
2867 do {
2868 char *s = NULL;
2869 FILE *from_file;
2870
2871 g_progname = llist_pop(&list_f);
2872 from_file = xfopen_stdin(g_progname);
2873
2874 for (i = j = 1; j > 0; i += j) {
2875 s = xrealloc(s, i + 4096);
2876 j = fread(s + i, 1, 4094, from_file);
2877 }
2878 s[i] = '\0';
2879 fclose(from_file);
2880 parse_program(s + 1);
2881 free(s);
2882 } while (list_f);
2883 argc++;
2884 } else {
2885 if (!argc)
2886 bb_show_usage();
2887 g_progname = "cmd. line";
2888 parse_program(*argv++);
2889 }
2890 if (opt & 0x8)
2891 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2892
2893
2894 setvar_i(intvar[ARGC], argc);
2895 setari_u(intvar[ARGV], 0, "awk");
2896 i = 0;
2897 while (*argv)
2898 setari_u(intvar[ARGV], ++i, *argv++);
2899
2900 evaluate(beginseq.first, &tv);
2901 if (!mainseq.first && !endseq.first)
2902 awk_exit(EXIT_SUCCESS);
2903
2904
2905 if (!iF) iF = next_input_file();
2906
2907
2908 while (iF) {
2909 nextfile = FALSE;
2910 setvar_i(intvar[FNR], 0);
2911
2912 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2913 nextrec = FALSE;
2914 incvar(intvar[NR]);
2915 incvar(intvar[FNR]);
2916 evaluate(mainseq.first, &tv);
2917
2918 if (nextfile)
2919 break;
2920 }
2921
2922 if (i < 0)
2923 syntax_error(strerror(errno));
2924
2925 iF = next_input_file();
2926 }
2927
2928 awk_exit(EXIT_SUCCESS);
2929
2930}
2931