1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include "libbb.h"
49#include "xregex.h"
50#include <math.h>
51
52
53
54
55
56
57#define debug_printf_walker(...) do {} while (0)
58#define debug_printf_eval(...) do {} while (0)
59#define debug_printf_parse(...) do {} while (0)
60
61#ifndef debug_printf_walker
62# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
63#endif
64#ifndef debug_printf_eval
65# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
66#endif
67#ifndef debug_printf_parse
68# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
69#endif
70
71
72
73
74
75
76#define OPTSTR_AWK "+" \
77 "F:v:*f:*" \
78 IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
79 "W:"
80enum {
81 OPTBIT_F,
82 OPTBIT_v,
83 OPTBIT_f,
84 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,)
85 OPTBIT_W,
86 OPT_F = 1 << OPTBIT_F,
87 OPT_v = 1 << OPTBIT_v,
88 OPT_f = 1 << OPTBIT_f,
89 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
90 OPT_W = 1 << OPTBIT_W
91};
92
93#define MAXVARFMT 240
94#define MINNVBLOCK 64
95
96
97#define VF_NUMBER 0x0001
98#define VF_ARRAY 0x0002
99
100#define VF_CACHED 0x0100
101#define VF_USER 0x0200
102#define VF_SPECIAL 0x0400
103#define VF_WALK 0x0800
104#define VF_FSTR 0x1000
105#define VF_CHILD 0x2000
106#define VF_DIRTY 0x4000
107
108
109#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
110
111typedef struct walker_list {
112 char *end;
113 char *cur;
114 struct walker_list *prev;
115 char wbuf[1];
116} walker_list;
117
118
119typedef struct var_s {
120 unsigned type;
121 double number;
122 char *string;
123 union {
124 int aidx;
125 struct xhash_s *array;
126 struct var_s *parent;
127 walker_list *walker;
128 } x;
129} var;
130
131
132typedef struct chain_s {
133 struct node_s *first;
134 struct node_s *last;
135 const char *programname;
136} chain;
137
138
139typedef struct func_s {
140 unsigned nargs;
141 struct chain_s body;
142} func;
143
144
145typedef struct rstream_s {
146 FILE *F;
147 char *buffer;
148 int adv;
149 int size;
150 int pos;
151 smallint is_pipe;
152} rstream;
153
154typedef struct hash_item_s {
155 union {
156 struct var_s v;
157 struct rstream_s rs;
158 struct func_s f;
159 } data;
160 struct hash_item_s *next;
161 char name[1];
162} hash_item;
163
164typedef struct xhash_s {
165 unsigned nel;
166 unsigned csize;
167 unsigned nprime;
168 unsigned glen;
169 struct hash_item_s **items;
170} xhash;
171
172
173typedef struct node_s {
174 uint32_t info;
175 unsigned lineno;
176 union {
177 struct node_s *n;
178 var *v;
179 int aidx;
180 char *new_progname;
181 regex_t *re;
182 } l;
183 union {
184 struct node_s *n;
185 regex_t *ire;
186 func *f;
187 } r;
188 union {
189 struct node_s *n;
190 } a;
191} node;
192
193
194typedef struct nvblock_s {
195 int size;
196 var *pos;
197 struct nvblock_s *prev;
198 struct nvblock_s *next;
199 var nv[];
200} nvblock;
201
202typedef struct tsplitter_s {
203 node n;
204 regex_t re[2];
205} tsplitter;
206
207
208
209#define TC_SEQSTART (1 << 0)
210#define TC_SEQTERM (1 << 1)
211#define TC_REGEXP (1 << 2)
212#define TC_OUTRDR (1 << 3)
213#define TC_UOPPOST (1 << 4)
214#define TC_UOPPRE1 (1 << 5)
215#define TC_BINOPX (1 << 6)
216#define TC_IN (1 << 7)
217#define TC_COMMA (1 << 8)
218#define TC_PIPE (1 << 9)
219#define TC_UOPPRE2 (1 << 10)
220#define TC_ARRTERM (1 << 11)
221#define TC_GRPSTART (1 << 12)
222#define TC_GRPTERM (1 << 13)
223#define TC_SEMICOL (1 << 14)
224#define TC_NEWLINE (1 << 15)
225#define TC_STATX (1 << 16)
226#define TC_WHILE (1 << 17)
227#define TC_ELSE (1 << 18)
228#define TC_BUILTIN (1 << 19)
229
230
231
232
233
234#define TC_LENGTH (1 << 20)
235#define TC_GETLINE (1 << 21)
236#define TC_FUNCDECL (1 << 22)
237#define TC_BEGIN (1 << 23)
238#define TC_END (1 << 24)
239#define TC_EOF (1 << 25)
240#define TC_VARIABLE (1 << 26)
241#define TC_ARRAY (1 << 27)
242#define TC_FUNCTION (1 << 28)
243#define TC_STRING (1 << 29)
244#define TC_NUMBER (1 << 30)
245
246#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
247
248
249#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
250
251#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
252 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
253 | TC_SEQSTART | TC_STRING | TC_NUMBER)
254
255#define TC_STATEMNT (TC_STATX | TC_WHILE)
256#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
257
258
259#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
260 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
261 | TC_FUNCDECL | TC_BEGIN | TC_END)
262
263
264#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
265 | TC_BINOP | TC_OPTERM)
266
267
268#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
269
270#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
271
272
273
274#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
275 | TC_STRING | TC_NUMBER | TC_UOPPOST)
276#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
277
278#define OF_RES1 0x010000
279#define OF_RES2 0x020000
280#define OF_STR1 0x040000
281#define OF_STR2 0x080000
282#define OF_NUM1 0x100000
283#define OF_CHECKED 0x200000
284#define OF_REQUIRED 0x400000
285
286
287
288#define xx 0
289#define xV OF_RES2
290#define xS (OF_RES2 | OF_STR2)
291#define Vx OF_RES1
292#define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED)
293#define VV (OF_RES1 | OF_RES2)
294#define Nx (OF_RES1 | OF_NUM1)
295#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
296#define Sx (OF_RES1 | OF_STR1)
297#define SV (OF_RES1 | OF_STR1 | OF_RES2)
298#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
299
300#define OPCLSMASK 0xFF00
301#define OPNMASK 0x007F
302
303
304
305
306
307#undef P
308#undef PRIMASK
309#undef PRIMASK2
310#define P(x) (x << 24)
311#define PRIMASK 0x7F000000
312#define PRIMASK2 0x7E000000
313
314
315
316#define SHIFT_TIL_THIS 0x0600
317#define RECUR_FROM_THIS 0x1000
318
319enum {
320 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
321 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
322
323 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
324 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
325 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
326
327 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
328 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
329 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
330 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
331 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
332 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
333 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
334 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
335 OC_DONE = 0x2800,
336
337 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
338 ST_WHILE = 0x3300
339};
340
341
342enum {
343 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
344 F_ti, F_le, F_sy, F_ff, F_cl
345};
346
347
348enum {
349 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
350 B_ge, B_gs, B_su,
351 B_an, B_co, B_ls, B_or, B_rs, B_xo,
352};
353
354
355
356#define NTC "\377"
357#define NTCC '\377'
358
359static const char tokenlist[] ALIGN1 =
360 "\1(" NTC
361 "\1)" NTC
362 "\1/" NTC
363 "\2>>" "\1>" "\1|" NTC
364 "\2++" "\2--" NTC
365 "\2++" "\2--" "\1$" NTC
366 "\2==" "\1=" "\2+=" "\2-="
367 "\2*=" "\2/=" "\2%=" "\2^="
368 "\1+" "\1-" "\3**=" "\2**"
369 "\1/" "\1%" "\1^" "\1*"
370 "\2!=" "\2>=" "\2<=" "\1>"
371 "\1<" "\2!~" "\1~" "\2&&"
372 "\2||" "\1?" "\1:" NTC
373 "\2in" NTC
374 "\1," NTC
375 "\1|" NTC
376 "\1+" "\1-" "\1!" NTC
377 "\1]" NTC
378 "\1{" NTC
379 "\1}" NTC
380 "\1;" NTC
381 "\1\n" NTC
382 "\2if" "\2do" "\3for" "\5break"
383 "\10continue" "\6delete" "\5print"
384 "\6printf" "\4next" "\10nextfile"
385 "\6return" "\4exit" NTC
386 "\5while" NTC
387 "\4else" NTC
388 "\3and" "\5compl" "\6lshift" "\2or"
389 "\6rshift" "\3xor"
390 "\5close" "\6system" "\6fflush" "\5atan2"
391 "\3cos" "\3exp" "\3int" "\3log"
392 "\4rand" "\3sin" "\4sqrt" "\5srand"
393 "\6gensub" "\4gsub" "\5index"
394 "\5match" "\5split" "\7sprintf" "\3sub"
395 "\6substr" "\7systime" "\10strftime" "\6mktime"
396 "\7tolower" "\7toupper" NTC
397 "\6length" NTC
398 "\7getline" NTC
399 "\4func" "\10function" NTC
400 "\5BEGIN" NTC
401 "\3END"
402
403 ;
404
405#define OC_B OC_BUILTIN
406
407static const uint32_t tokeninfo[] = {
408 0,
409 0,
410 OC_REGEXP,
411 xS|'a', xS|'w', xS|'|',
412 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
413 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
414 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
415 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
416 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
417 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
418 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
419 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
420 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
421 OC_IN|SV|P(49),
422 OC_COMMA|SS|P(80),
423 OC_PGETLINE|SV|P(37),
424 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
425 0,
426 0,
427 0,
428 0,
429 0,
430 ST_IF, ST_DO, ST_FOR, OC_BREAK,
431 OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
432 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
433 OC_RETURN|Vx, OC_EXIT|Nx,
434 ST_WHILE,
435 0,
436 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
437 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
438 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
439 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
440 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
441 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b),
442 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
443 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
444 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
445 OC_FBLTIN|Sx|F_le,
446 OC_GETLINE|SV|P(0),
447 0, 0,
448 0,
449 0
450};
451
452
453
454enum {
455 CONVFMT, OFMT, FS, OFS,
456 ORS, RS, RT, FILENAME,
457 SUBSEP, F0, ARGIND, ARGC,
458 ARGV, ERRNO, FNR, NR,
459 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
460};
461
462static const char vNames[] ALIGN1 =
463 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
464 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
465 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
466 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
467 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
468
469static const char vValues[] ALIGN1 =
470 "%.6g\0" "%.6g\0" " \0" " \0"
471 "\n\0" "\n\0" "\0" "\0"
472 "\034\0" "\0" "\377";
473
474
475#define FIRST_PRIME 61
476static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
477
478
479
480
481
482
483
484struct globals {
485 double t_double;
486 chain beginseq, mainseq, endseq;
487 chain *seq;
488 node *break_ptr, *continue_ptr;
489 rstream *iF;
490 xhash *vhash, *ahash, *fdhash, *fnhash;
491 const char *g_progname;
492 int g_lineno;
493 int nfields;
494 int maxfields;
495 var *Fields;
496 nvblock *g_cb;
497 char *g_pos;
498 char *g_buf;
499 smallint icase;
500 smallint exiting;
501 smallint nextrec;
502 smallint nextfile;
503 smallint is_f0_split;
504 smallint t_rollback;
505};
506struct globals2 {
507 uint32_t t_info;
508 uint32_t t_tclass;
509 char *t_string;
510 int t_lineno;
511
512 var *intvar[NUM_INTERNAL_VARS];
513
514
515 char *split_f0__fstrings;
516
517 uint32_t next_token__save_tclass;
518 uint32_t next_token__save_info;
519 uint32_t next_token__ltclass;
520 smallint next_token__concat_inserted;
521
522 smallint next_input_file__files_happen;
523 rstream next_input_file__rsm;
524
525 var *evaluate__fnargs;
526 unsigned evaluate__seed;
527 regex_t evaluate__sreg;
528
529 var ptest__v;
530
531 tsplitter exec_builtin__tspl;
532
533
534 tsplitter fsplitter, rsplitter;
535};
536#define G1 (ptr_to_globals[-1])
537#define G (*(struct globals2 *)ptr_to_globals)
538
539
540
541
542
543#define t_double (G1.t_double )
544#define beginseq (G1.beginseq )
545#define mainseq (G1.mainseq )
546#define endseq (G1.endseq )
547#define seq (G1.seq )
548#define break_ptr (G1.break_ptr )
549#define continue_ptr (G1.continue_ptr)
550#define iF (G1.iF )
551#define vhash (G1.vhash )
552#define ahash (G1.ahash )
553#define fdhash (G1.fdhash )
554#define fnhash (G1.fnhash )
555#define g_progname (G1.g_progname )
556#define g_lineno (G1.g_lineno )
557#define nfields (G1.nfields )
558#define maxfields (G1.maxfields )
559#define Fields (G1.Fields )
560#define g_cb (G1.g_cb )
561#define g_pos (G1.g_pos )
562#define g_buf (G1.g_buf )
563#define icase (G1.icase )
564#define exiting (G1.exiting )
565#define nextrec (G1.nextrec )
566#define nextfile (G1.nextfile )
567#define is_f0_split (G1.is_f0_split )
568#define t_rollback (G1.t_rollback )
569#define t_info (G.t_info )
570#define t_tclass (G.t_tclass )
571#define t_string (G.t_string )
572#define t_lineno (G.t_lineno )
573#define intvar (G.intvar )
574#define fsplitter (G.fsplitter )
575#define rsplitter (G.rsplitter )
576#define INIT_G() do { \
577 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
578 G.next_token__ltclass = TC_OPTERM; \
579 G.evaluate__seed = 1; \
580} while (0)
581
582
583
584static void handle_special(var *);
585static node *parse_expr(uint32_t);
586static void chain_group(void);
587static var *evaluate(node *, var *);
588static rstream *next_input_file(void);
589static int fmt_num(char *, int, const char *, double, int);
590static int awk_exit(int) NORETURN;
591
592
593
594static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
595static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
596static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
597static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
598static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
599static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments";
600static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
601static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
602static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
603static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
604static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
605
606static void zero_out_var(var *vp)
607{
608 memset(vp, 0, sizeof(*vp));
609}
610
611static void syntax_error(const char *message) NORETURN;
612static void syntax_error(const char *message)
613{
614 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
615}
616
617
618
619static unsigned hashidx(const char *name)
620{
621 unsigned idx = 0;
622
623 while (*name)
624 idx = *name++ + (idx << 6) - idx;
625 return idx;
626}
627
628
629static xhash *hash_init(void)
630{
631 xhash *newhash;
632
633 newhash = xzalloc(sizeof(*newhash));
634 newhash->csize = FIRST_PRIME;
635 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
636
637 return newhash;
638}
639
640
641static void *hash_search(xhash *hash, const char *name)
642{
643 hash_item *hi;
644
645 hi = hash->items[hashidx(name) % hash->csize];
646 while (hi) {
647 if (strcmp(hi->name, name) == 0)
648 return &hi->data;
649 hi = hi->next;
650 }
651 return NULL;
652}
653
654
655static void hash_rebuild(xhash *hash)
656{
657 unsigned newsize, i, idx;
658 hash_item **newitems, *hi, *thi;
659
660 if (hash->nprime == ARRAY_SIZE(PRIMES))
661 return;
662
663 newsize = PRIMES[hash->nprime++];
664 newitems = xzalloc(newsize * sizeof(newitems[0]));
665
666 for (i = 0; i < hash->csize; i++) {
667 hi = hash->items[i];
668 while (hi) {
669 thi = hi;
670 hi = thi->next;
671 idx = hashidx(thi->name) % newsize;
672 thi->next = newitems[idx];
673 newitems[idx] = thi;
674 }
675 }
676
677 free(hash->items);
678 hash->csize = newsize;
679 hash->items = newitems;
680}
681
682
683static void *hash_find(xhash *hash, const char *name)
684{
685 hash_item *hi;
686 unsigned idx;
687 int l;
688
689 hi = hash_search(hash, name);
690 if (!hi) {
691 if (++hash->nel / hash->csize > 10)
692 hash_rebuild(hash);
693
694 l = strlen(name) + 1;
695 hi = xzalloc(sizeof(*hi) + l);
696 strcpy(hi->name, name);
697
698 idx = hashidx(name) % hash->csize;
699 hi->next = hash->items[idx];
700 hash->items[idx] = hi;
701 hash->glen += l;
702 }
703 return &hi->data;
704}
705
706#define findvar(hash, name) ((var*) hash_find((hash), (name)))
707#define newvar(name) ((var*) hash_find(vhash, (name)))
708#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
709#define newfunc(name) ((func*) hash_find(fnhash, (name)))
710
711static void hash_remove(xhash *hash, const char *name)
712{
713 hash_item *hi, **phi;
714
715 phi = &hash->items[hashidx(name) % hash->csize];
716 while (*phi) {
717 hi = *phi;
718 if (strcmp(hi->name, name) == 0) {
719 hash->glen -= (strlen(name) + 1);
720 hash->nel--;
721 *phi = hi->next;
722 free(hi);
723 break;
724 }
725 phi = &hi->next;
726 }
727}
728
729
730
731static char *skip_spaces(char *p)
732{
733 while (1) {
734 if (*p == '\\' && p[1] == '\n') {
735 p++;
736 t_lineno++;
737 } else if (*p != ' ' && *p != '\t') {
738 break;
739 }
740 p++;
741 }
742 return p;
743}
744
745
746static char *nextword(char **s)
747{
748 char *p = *s;
749 while (*(*s)++ != '\0')
750 continue;
751 return p;
752}
753
754static char nextchar(char **s)
755{
756 char c, *pps;
757
758 c = *(*s)++;
759 pps = *s;
760 if (c == '\\')
761 c = bb_process_escape_sequence((const char**)s);
762
763
764
765
766 if (c == '\\' && *s == pps) {
767 c = *(*s);
768 if (c)
769 (*s)++;
770 }
771 return c;
772}
773
774
775
776static void unescape_string_in_place(char *s1)
777{
778 char *s = s1;
779 while ((*s1 = nextchar(&s)) != '\0')
780 s1++;
781}
782
783static ALWAYS_INLINE int isalnum_(int c)
784{
785 return (isalnum(c) || c == '_');
786}
787
788static double my_strtod(char **pp)
789{
790 char *cp = *pp;
791 if (ENABLE_DESKTOP && cp[0] == '0') {
792
793 char c = (cp[1] | 0x20);
794 if (c == 'x' || isdigit(cp[1])) {
795 unsigned long long ull = strtoull(cp, pp, 0);
796 if (c == 'x')
797 return ull;
798 c = **pp;
799 if (!isdigit(c) && c != '.')
800 return ull;
801
802
803
804
805
806 }
807 }
808 return strtod(cp, pp);
809}
810
811
812
813static xhash *iamarray(var *v)
814{
815 var *a = v;
816
817 while (a->type & VF_CHILD)
818 a = a->x.parent;
819
820 if (!(a->type & VF_ARRAY)) {
821 a->type |= VF_ARRAY;
822 a->x.array = hash_init();
823 }
824 return a->x.array;
825}
826
827static void clear_array(xhash *array)
828{
829 unsigned i;
830 hash_item *hi, *thi;
831
832 for (i = 0; i < array->csize; i++) {
833 hi = array->items[i];
834 while (hi) {
835 thi = hi;
836 hi = hi->next;
837 free(thi->data.v.string);
838 free(thi);
839 }
840 array->items[i] = NULL;
841 }
842 array->glen = array->nel = 0;
843}
844
845
846static var *clrvar(var *v)
847{
848 if (!(v->type & VF_FSTR))
849 free(v->string);
850
851 v->type &= VF_DONTTOUCH;
852 v->type |= VF_DIRTY;
853 v->string = NULL;
854 return v;
855}
856
857
858static var *setvar_p(var *v, char *value)
859{
860 clrvar(v);
861 v->string = value;
862 handle_special(v);
863 return v;
864}
865
866
867static var *setvar_s(var *v, const char *value)
868{
869 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
870}
871
872
873static var *setvar_u(var *v, const char *value)
874{
875 v = setvar_s(v, value);
876 v->type |= VF_USER;
877 return v;
878}
879
880
881static void setari_u(var *a, int idx, const char *s)
882{
883 var *v;
884
885 v = findvar(iamarray(a), itoa(idx));
886 setvar_u(v, s);
887}
888
889
890static var *setvar_i(var *v, double value)
891{
892 clrvar(v);
893 v->type |= VF_NUMBER;
894 v->number = value;
895 handle_special(v);
896 return v;
897}
898
899static const char *getvar_s(var *v)
900{
901
902 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
903 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
904 v->string = xstrdup(g_buf);
905 v->type |= VF_CACHED;
906 }
907 return (v->string == NULL) ? "" : v->string;
908}
909
910static double getvar_i(var *v)
911{
912 char *s;
913
914 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
915 v->number = 0;
916 s = v->string;
917 if (s && *s) {
918 debug_printf_eval("getvar_i: '%s'->", s);
919 v->number = my_strtod(&s);
920 debug_printf_eval("%f (s:'%s')\n", v->number, s);
921 if (v->type & VF_USER) {
922 s = skip_spaces(s);
923 if (*s != '\0')
924 v->type &= ~VF_USER;
925 }
926 } else {
927 debug_printf_eval("getvar_i: '%s'->zero\n", s);
928 v->type &= ~VF_USER;
929 }
930 v->type |= VF_CACHED;
931 }
932 debug_printf_eval("getvar_i: %f\n", v->number);
933 return v->number;
934}
935
936
937static unsigned long getvar_i_int(var *v)
938{
939 double d = getvar_i(v);
940
941
942
943 if (d >= 0)
944 return (unsigned long)d;
945
946 return - (long) (unsigned long) (-d);
947}
948
949static var *copyvar(var *dest, const var *src)
950{
951 if (dest != src) {
952 clrvar(dest);
953 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
954 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
955 dest->number = src->number;
956 if (src->string)
957 dest->string = xstrdup(src->string);
958 }
959 handle_special(dest);
960 return dest;
961}
962
963static var *incvar(var *v)
964{
965 return setvar_i(v, getvar_i(v) + 1.0);
966}
967
968
969static int is_numeric(var *v)
970{
971 getvar_i(v);
972 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
973}
974
975
976static int istrue(var *v)
977{
978 if (is_numeric(v))
979 return (v->number != 0);
980 return (v->string && v->string[0]);
981}
982
983
984static var *nvalloc(int n)
985{
986 nvblock *pb = NULL;
987 var *v, *r;
988 int size;
989
990 while (g_cb) {
991 pb = g_cb;
992 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
993 break;
994 g_cb = g_cb->next;
995 }
996
997 if (!g_cb) {
998 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
999 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
1000 g_cb->size = size;
1001 g_cb->pos = g_cb->nv;
1002 g_cb->prev = pb;
1003
1004 if (pb)
1005 pb->next = g_cb;
1006 }
1007
1008 v = r = g_cb->pos;
1009 g_cb->pos += n;
1010
1011 while (v < g_cb->pos) {
1012 v->type = 0;
1013 v->string = NULL;
1014 v++;
1015 }
1016
1017 return r;
1018}
1019
1020static void nvfree(var *v)
1021{
1022 var *p;
1023
1024 if (v < g_cb->nv || v >= g_cb->pos)
1025 syntax_error(EMSG_INTERNAL_ERROR);
1026
1027 for (p = v; p < g_cb->pos; p++) {
1028 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1029 clear_array(iamarray(p));
1030 free(p->x.array->items);
1031 free(p->x.array);
1032 }
1033 if (p->type & VF_WALK) {
1034 walker_list *n;
1035 walker_list *w = p->x.walker;
1036 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1037 p->x.walker = NULL;
1038 while (w) {
1039 n = w->prev;
1040 debug_printf_walker(" free(%p)\n", w);
1041 free(w);
1042 w = n;
1043 }
1044 }
1045 clrvar(p);
1046 }
1047
1048 g_cb->pos = v;
1049 while (g_cb->prev && g_cb->pos == g_cb->nv) {
1050 g_cb = g_cb->prev;
1051 }
1052}
1053
1054
1055
1056
1057
1058
1059static uint32_t next_token(uint32_t expected)
1060{
1061#define concat_inserted (G.next_token__concat_inserted)
1062#define save_tclass (G.next_token__save_tclass)
1063#define save_info (G.next_token__save_info)
1064
1065#define ltclass (G.next_token__ltclass)
1066
1067 char *p, *s;
1068 const char *tl;
1069 uint32_t tc;
1070 const uint32_t *ti;
1071
1072 if (t_rollback) {
1073 t_rollback = FALSE;
1074 } else if (concat_inserted) {
1075 concat_inserted = FALSE;
1076 t_tclass = save_tclass;
1077 t_info = save_info;
1078 } else {
1079 p = g_pos;
1080 readnext:
1081 p = skip_spaces(p);
1082 g_lineno = t_lineno;
1083 if (*p == '#')
1084 while (*p != '\n' && *p != '\0')
1085 p++;
1086
1087 if (*p == '\n')
1088 t_lineno++;
1089
1090 if (*p == '\0') {
1091 tc = TC_EOF;
1092 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1093 } else if (*p == '\"') {
1094
1095 t_string = s = ++p;
1096 while (*p != '\"') {
1097 char *pp;
1098 if (*p == '\0' || *p == '\n')
1099 syntax_error(EMSG_UNEXP_EOS);
1100 pp = p;
1101 *s++ = nextchar(&pp);
1102 p = pp;
1103 }
1104 p++;
1105 *s = '\0';
1106 tc = TC_STRING;
1107 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1108 } else if ((expected & TC_REGEXP) && *p == '/') {
1109
1110 t_string = s = ++p;
1111 while (*p != '/') {
1112 if (*p == '\0' || *p == '\n')
1113 syntax_error(EMSG_UNEXP_EOS);
1114 *s = *p++;
1115 if (*s++ == '\\') {
1116 char *pp = p;
1117 s[-1] = bb_process_escape_sequence((const char **)&pp);
1118 if (*p == '\\')
1119 *s++ = '\\';
1120 if (pp == p)
1121 *s++ = *p++;
1122 else
1123 p = pp;
1124 }
1125 }
1126 p++;
1127 *s = '\0';
1128 tc = TC_REGEXP;
1129 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1130
1131 } else if (*p == '.' || isdigit(*p)) {
1132
1133 char *pp = p;
1134 t_double = my_strtod(&pp);
1135 p = pp;
1136 if (*p == '.')
1137 syntax_error(EMSG_UNEXP_TOKEN);
1138 tc = TC_NUMBER;
1139 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1140 } else {
1141
1142 tl = tokenlist;
1143 tc = 0x00000001;
1144 ti = tokeninfo;
1145 while (*tl) {
1146 int l = (unsigned char) *tl++;
1147 if (l == (unsigned char) NTCC) {
1148 tc <<= 1;
1149 continue;
1150 }
1151
1152
1153
1154
1155 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1156 && strncmp(p, tl, l) == 0
1157 && !((tc & TC_WORD) && isalnum_(p[l]))
1158 ) {
1159
1160 t_info = *ti;
1161 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1162 p += l;
1163 goto token_found;
1164 }
1165 ti++;
1166 tl += l;
1167 }
1168
1169
1170
1171 if (!isalnum_(*p))
1172 syntax_error(EMSG_UNEXP_TOKEN);
1173
1174 t_string = --p;
1175 while (isalnum_(*++p)) {
1176 p[-1] = *p;
1177 }
1178 p[-1] = '\0';
1179 tc = TC_VARIABLE;
1180
1181 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1182 p = skip_spaces(p);
1183 if (*p == '(') {
1184 tc = TC_FUNCTION;
1185 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1186 } else {
1187 if (*p == '[') {
1188 p++;
1189 tc = TC_ARRAY;
1190 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1191 } else
1192 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1193 }
1194 }
1195 token_found:
1196 g_pos = p;
1197
1198
1199 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1200 goto readnext;
1201
1202
1203 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1204 concat_inserted = TRUE;
1205 save_tclass = tc;
1206 save_info = t_info;
1207 tc = TC_BINOP;
1208 t_info = OC_CONCAT | SS | P(35);
1209 }
1210
1211 t_tclass = tc;
1212 }
1213 ltclass = t_tclass;
1214
1215
1216 if (!(ltclass & expected)) {
1217 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1218 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1219 }
1220
1221 return ltclass;
1222#undef concat_inserted
1223#undef save_tclass
1224#undef save_info
1225#undef ltclass
1226}
1227
1228static void rollback_token(void)
1229{
1230 t_rollback = TRUE;
1231}
1232
1233static node *new_node(uint32_t info)
1234{
1235 node *n;
1236
1237 n = xzalloc(sizeof(node));
1238 n->info = info;
1239 n->lineno = g_lineno;
1240 return n;
1241}
1242
1243static void mk_re_node(const char *s, node *n, regex_t *re)
1244{
1245 n->info = OC_REGEXP;
1246 n->l.re = re;
1247 n->r.ire = re + 1;
1248 xregcomp(re, s, REG_EXTENDED);
1249 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1250}
1251
1252static node *condition(void)
1253{
1254 next_token(TC_SEQSTART);
1255 return parse_expr(TC_SEQTERM);
1256}
1257
1258
1259
1260static node *parse_expr(uint32_t iexp)
1261{
1262 node sn;
1263 node *cn = &sn;
1264 node *vn, *glptr;
1265 uint32_t tc, xtc;
1266 var *v;
1267
1268 debug_printf_parse("%s(%x)\n", __func__, iexp);
1269
1270 sn.info = PRIMASK;
1271 sn.r.n = sn.a.n = glptr = NULL;
1272 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1273
1274 while (!((tc = next_token(xtc)) & iexp)) {
1275
1276 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1277
1278 debug_printf_parse("%s: input redir\n", __func__);
1279 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1280 cn->a.n = glptr;
1281 xtc = TC_OPERAND | TC_UOPPRE;
1282 glptr = NULL;
1283
1284 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1285 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1286
1287
1288 vn = cn;
1289 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1290 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1291 ) {
1292 vn = vn->a.n;
1293 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1294 }
1295 if ((t_info & OPCLSMASK) == OC_TERNARY)
1296 t_info += P(6);
1297 cn = vn->a.n->r.n = new_node(t_info);
1298 cn->a.n = vn->a.n;
1299 if (tc & TC_BINOP) {
1300 cn->l.n = vn;
1301 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1302 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1303
1304 next_token(TC_GETLINE);
1305
1306 cn->info &= ~PRIMASK;
1307 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1308 }
1309 } else {
1310 cn->r.n = vn;
1311 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1312 }
1313 vn->a.n = cn;
1314
1315 } else {
1316 debug_printf_parse("%s: other\n", __func__);
1317
1318
1319 vn = cn;
1320 cn = vn->r.n = new_node(t_info);
1321 cn->a.n = vn;
1322 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1323 if (tc & (TC_OPERAND | TC_REGEXP)) {
1324 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1325 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1326
1327
1328 switch (tc) {
1329 case TC_VARIABLE:
1330 case TC_ARRAY:
1331 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1332 cn->info = OC_VAR;
1333 v = hash_search(ahash, t_string);
1334 if (v != NULL) {
1335 cn->info = OC_FNARG;
1336 cn->l.aidx = v->x.aidx;
1337 } else {
1338 cn->l.v = newvar(t_string);
1339 }
1340 if (tc & TC_ARRAY) {
1341 cn->info |= xS;
1342 cn->r.n = parse_expr(TC_ARRTERM);
1343 }
1344 break;
1345
1346 case TC_NUMBER:
1347 case TC_STRING:
1348 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1349 cn->info = OC_VAR;
1350 v = cn->l.v = xzalloc(sizeof(var));
1351 if (tc & TC_NUMBER)
1352 setvar_i(v, t_double);
1353 else
1354 setvar_s(v, t_string);
1355 break;
1356
1357 case TC_REGEXP:
1358 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1359 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1360 break;
1361
1362 case TC_FUNCTION:
1363 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1364 cn->info = OC_FUNC;
1365 cn->r.f = newfunc(t_string);
1366 cn->l.n = condition();
1367 break;
1368
1369 case TC_SEQSTART:
1370 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1371 cn = vn->r.n = parse_expr(TC_SEQTERM);
1372 if (!cn)
1373 syntax_error("Empty sequence");
1374 cn->a.n = vn;
1375 break;
1376
1377 case TC_GETLINE:
1378 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1379 glptr = cn;
1380 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1381 break;
1382
1383 case TC_BUILTIN:
1384 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1385 cn->l.n = condition();
1386 break;
1387
1388 case TC_LENGTH:
1389 debug_printf_parse("%s: TC_LENGTH\n", __func__);
1390 next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM);
1391 rollback_token();
1392 if (t_tclass & TC_SEQSTART) {
1393
1394 cn->l.n = condition();
1395 }
1396 break;
1397 }
1398 }
1399 }
1400 }
1401
1402 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1403 return sn.r.n;
1404}
1405
1406
1407static node *chain_node(uint32_t info)
1408{
1409 node *n;
1410
1411 if (!seq->first)
1412 seq->first = seq->last = new_node(0);
1413
1414 if (seq->programname != g_progname) {
1415 seq->programname = g_progname;
1416 n = chain_node(OC_NEWSOURCE);
1417 n->l.new_progname = xstrdup(g_progname);
1418 }
1419
1420 n = seq->last;
1421 n->info = info;
1422 seq->last = n->a.n = new_node(OC_DONE);
1423
1424 return n;
1425}
1426
1427static void chain_expr(uint32_t info)
1428{
1429 node *n;
1430
1431 n = chain_node(info);
1432
1433 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1434 if ((info & OF_REQUIRED) && !n->l.n)
1435 syntax_error(EMSG_TOO_FEW_ARGS);
1436
1437 if (t_tclass & TC_GRPTERM)
1438 rollback_token();
1439}
1440
1441static node *chain_loop(node *nn)
1442{
1443 node *n, *n2, *save_brk, *save_cont;
1444
1445 save_brk = break_ptr;
1446 save_cont = continue_ptr;
1447
1448 n = chain_node(OC_BR | Vx);
1449 continue_ptr = new_node(OC_EXEC);
1450 break_ptr = new_node(OC_EXEC);
1451 chain_group();
1452 n2 = chain_node(OC_EXEC | Vx);
1453 n2->l.n = nn;
1454 n2->a.n = n;
1455 continue_ptr->a.n = n2;
1456 break_ptr->a.n = n->r.n = seq->last;
1457
1458 continue_ptr = save_cont;
1459 break_ptr = save_brk;
1460
1461 return n;
1462}
1463
1464
1465static void chain_group(void)
1466{
1467 uint32_t c;
1468 node *n, *n2, *n3;
1469
1470 do {
1471 c = next_token(TC_GRPSEQ);
1472 } while (c & TC_NEWLINE);
1473
1474 if (c & TC_GRPSTART) {
1475 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1476 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1477 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1478 if (t_tclass & TC_NEWLINE)
1479 continue;
1480 rollback_token();
1481 chain_group();
1482 }
1483 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1484 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1485 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1486 rollback_token();
1487 chain_expr(OC_EXEC | Vx);
1488 } else {
1489
1490 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1491 switch (t_info & OPCLSMASK) {
1492 case ST_IF:
1493 debug_printf_parse("%s: ST_IF\n", __func__);
1494 n = chain_node(OC_BR | Vx);
1495 n->l.n = condition();
1496 chain_group();
1497 n2 = chain_node(OC_EXEC);
1498 n->r.n = seq->last;
1499 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1500 chain_group();
1501 n2->a.n = seq->last;
1502 } else {
1503 rollback_token();
1504 }
1505 break;
1506
1507 case ST_WHILE:
1508 debug_printf_parse("%s: ST_WHILE\n", __func__);
1509 n2 = condition();
1510 n = chain_loop(NULL);
1511 n->l.n = n2;
1512 break;
1513
1514 case ST_DO:
1515 debug_printf_parse("%s: ST_DO\n", __func__);
1516 n2 = chain_node(OC_EXEC);
1517 n = chain_loop(NULL);
1518 n2->a.n = n->a.n;
1519 next_token(TC_WHILE);
1520 n->l.n = condition();
1521 break;
1522
1523 case ST_FOR:
1524 debug_printf_parse("%s: ST_FOR\n", __func__);
1525 next_token(TC_SEQSTART);
1526 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1527 if (t_tclass & TC_SEQTERM) {
1528 if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
1529 syntax_error(EMSG_UNEXP_TOKEN);
1530 n = chain_node(OC_WALKINIT | VV);
1531 n->l.n = n2->l.n;
1532 n->r.n = n2->r.n;
1533 n = chain_loop(NULL);
1534 n->info = OC_WALKNEXT | Vx;
1535 n->l.n = n2->l.n;
1536 } else {
1537 n = chain_node(OC_EXEC | Vx);
1538 n->l.n = n2;
1539 n2 = parse_expr(TC_SEMICOL);
1540 n3 = parse_expr(TC_SEQTERM);
1541 n = chain_loop(n3);
1542 n->l.n = n2;
1543 if (!n2)
1544 n->info = OC_EXEC;
1545 }
1546 break;
1547
1548 case OC_PRINT:
1549 case OC_PRINTF:
1550 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1551 n = chain_node(t_info);
1552 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1553 if (t_tclass & TC_OUTRDR) {
1554 n->info |= t_info;
1555 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1556 }
1557 if (t_tclass & TC_GRPTERM)
1558 rollback_token();
1559 break;
1560
1561 case OC_BREAK:
1562 debug_printf_parse("%s: OC_BREAK\n", __func__);
1563 n = chain_node(OC_EXEC);
1564 n->a.n = break_ptr;
1565 chain_expr(t_info);
1566 break;
1567
1568 case OC_CONTINUE:
1569 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1570 n = chain_node(OC_EXEC);
1571 n->a.n = continue_ptr;
1572 chain_expr(t_info);
1573 break;
1574
1575
1576 default:
1577 debug_printf_parse("%s: default\n", __func__);
1578 chain_expr(t_info);
1579 }
1580 }
1581}
1582
1583static void parse_program(char *p)
1584{
1585 uint32_t tclass;
1586 node *cn;
1587 func *f;
1588 var *v;
1589
1590 g_pos = p;
1591 t_lineno = 1;
1592 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1593 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1594
1595 if (tclass & TC_OPTERM) {
1596 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1597 continue;
1598 }
1599
1600 seq = &mainseq;
1601 if (tclass & TC_BEGIN) {
1602 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1603 seq = &beginseq;
1604 chain_group();
1605 } else if (tclass & TC_END) {
1606 debug_printf_parse("%s: TC_END\n", __func__);
1607 seq = &endseq;
1608 chain_group();
1609 } else if (tclass & TC_FUNCDECL) {
1610 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1611 next_token(TC_FUNCTION);
1612 g_pos++;
1613 f = newfunc(t_string);
1614 f->body.first = NULL;
1615 f->nargs = 0;
1616
1617 while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) {
1618
1619
1620 if (f->nargs == 0 && t_tclass == TC_SEQTERM)
1621 break;
1622
1623
1624 if (t_tclass != TC_VARIABLE)
1625 syntax_error(EMSG_UNEXP_TOKEN);
1626
1627 v = findvar(ahash, t_string);
1628 v->x.aidx = f->nargs++;
1629
1630
1631 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1632 break;
1633 if (t_tclass != TC_COMMA)
1634 syntax_error(EMSG_UNEXP_TOKEN);
1635 }
1636 seq = &f->body;
1637 chain_group();
1638 clear_array(ahash);
1639 } else if (tclass & TC_OPSEQ) {
1640 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1641 rollback_token();
1642 cn = chain_node(OC_TEST);
1643 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1644 if (t_tclass & TC_GRPSTART) {
1645 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1646 rollback_token();
1647 chain_group();
1648 } else {
1649 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1650 chain_node(OC_PRINT);
1651 }
1652 cn->r.n = mainseq.last;
1653 } else {
1654 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1655 rollback_token();
1656 chain_group();
1657 }
1658 }
1659 debug_printf_parse("%s: TC_EOF\n", __func__);
1660}
1661
1662
1663
1664
1665static node *mk_splitter(const char *s, tsplitter *spl)
1666{
1667 regex_t *re, *ire;
1668 node *n;
1669
1670 re = &spl->re[0];
1671 ire = &spl->re[1];
1672 n = &spl->n;
1673 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1674 regfree(re);
1675 regfree(ire);
1676 }
1677 if (s[0] && s[1]) {
1678 mk_re_node(s, n, re);
1679 } else {
1680 n->info = (uint32_t) s[0];
1681 }
1682
1683 return n;
1684}
1685
1686
1687
1688
1689
1690static regex_t *as_regex(node *op, regex_t *preg)
1691{
1692 int cflags;
1693 var *v;
1694 const char *s;
1695
1696 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1697 return icase ? op->r.ire : op->l.re;
1698 }
1699 v = nvalloc(1);
1700 s = getvar_s(evaluate(op, v));
1701
1702 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1703
1704
1705
1706
1707
1708 if (regcomp(preg, s, cflags)) {
1709 cflags &= ~REG_EXTENDED;
1710 xregcomp(preg, s, cflags);
1711 }
1712 nvfree(v);
1713 return preg;
1714}
1715
1716
1717
1718
1719
1720static char* qrealloc(char *b, int n, int *size)
1721{
1722 if (!b || n >= *size) {
1723 *size = n + (n>>1) + 80;
1724 b = xrealloc(b, *size);
1725 }
1726 return b;
1727}
1728
1729
1730static void fsrealloc(int size)
1731{
1732 int i;
1733
1734 if (size >= maxfields) {
1735 i = maxfields;
1736 maxfields = size + 16;
1737 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1738 for (; i < maxfields; i++) {
1739 Fields[i].type = VF_SPECIAL;
1740 Fields[i].string = NULL;
1741 }
1742 }
1743
1744 for (i = size; i < nfields; i++) {
1745 clrvar(Fields + i);
1746 }
1747 nfields = size;
1748}
1749
1750static int awk_split(const char *s, node *spl, char **slist)
1751{
1752 int l, n;
1753 char c[4];
1754 char *s1;
1755 regmatch_t pmatch[2];
1756
1757
1758 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1759 strcpy(s1, s);
1760
1761 c[0] = c[1] = (char)spl->info;
1762 c[2] = c[3] = '\0';
1763 if (*getvar_s(intvar[RS]) == '\0')
1764 c[2] = '\n';
1765
1766 n = 0;
1767 if ((spl->info & OPCLSMASK) == OC_REGEXP) {
1768 if (!*s)
1769 return n;
1770 n++;
1771 do {
1772 l = strcspn(s, c+2);
1773 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1774 && pmatch[0].rm_so <= l
1775 ) {
1776 l = pmatch[0].rm_so;
1777 if (pmatch[0].rm_eo == 0) {
1778 l++;
1779 pmatch[0].rm_eo++;
1780 }
1781 n++;
1782 } else {
1783 pmatch[0].rm_eo = l;
1784 if (s[l])
1785 pmatch[0].rm_eo++;
1786 }
1787 memcpy(s1, s, l);
1788
1789 do {
1790 s1[l] = '\0';
1791 } while (++l < pmatch[0].rm_eo);
1792 nextword(&s1);
1793 s += pmatch[0].rm_eo;
1794 } while (*s);
1795 return n;
1796 }
1797 if (c[0] == '\0') {
1798 while (*s) {
1799 *s1++ = *s++;
1800 *s1++ = '\0';
1801 n++;
1802 }
1803 return n;
1804 }
1805 if (c[0] != ' ') {
1806 if (icase) {
1807 c[0] = toupper(c[0]);
1808 c[1] = tolower(c[1]);
1809 }
1810 if (*s1)
1811 n++;
1812 while ((s1 = strpbrk(s1, c)) != NULL) {
1813 *s1++ = '\0';
1814 n++;
1815 }
1816 return n;
1817 }
1818
1819 while (*s) {
1820 s = skip_whitespace(s);
1821 if (!*s)
1822 break;
1823 n++;
1824 while (*s && !isspace(*s))
1825 *s1++ = *s++;
1826 *s1++ = '\0';
1827 }
1828 return n;
1829}
1830
1831static void split_f0(void)
1832{
1833
1834#define fstrings (G.split_f0__fstrings)
1835
1836 int i, n;
1837 char *s;
1838
1839 if (is_f0_split)
1840 return;
1841
1842 is_f0_split = TRUE;
1843 free(fstrings);
1844 fsrealloc(0);
1845 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1846 fsrealloc(n);
1847 s = fstrings;
1848 for (i = 0; i < n; i++) {
1849 Fields[i].string = nextword(&s);
1850 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1851 }
1852
1853
1854 clrvar(intvar[NF]);
1855 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1856 intvar[NF]->number = nfields;
1857#undef fstrings
1858}
1859
1860
1861static void handle_special(var *v)
1862{
1863 int n;
1864 char *b;
1865 const char *sep, *s;
1866 int sl, l, len, i, bsize;
1867
1868 if (!(v->type & VF_SPECIAL))
1869 return;
1870
1871 if (v == intvar[NF]) {
1872 n = (int)getvar_i(v);
1873 if (n < 0)
1874 syntax_error("NF set to negative value");
1875 fsrealloc(n);
1876
1877
1878 sep = getvar_s(intvar[OFS]);
1879 sl = strlen(sep);
1880 b = NULL;
1881 len = 0;
1882 for (i = 0; i < n; i++) {
1883 s = getvar_s(&Fields[i]);
1884 l = strlen(s);
1885 if (b) {
1886 memcpy(b+len, sep, sl);
1887 len += sl;
1888 }
1889 b = qrealloc(b, len+l+sl, &bsize);
1890 memcpy(b+len, s, l);
1891 len += l;
1892 }
1893 if (b)
1894 b[len] = '\0';
1895 setvar_p(intvar[F0], b);
1896 is_f0_split = TRUE;
1897
1898 } else if (v == intvar[F0]) {
1899 is_f0_split = FALSE;
1900
1901 } else if (v == intvar[FS]) {
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912 split_f0();
1913
1914 mk_splitter(getvar_s(v), &fsplitter);
1915 } else if (v == intvar[RS]) {
1916 mk_splitter(getvar_s(v), &rsplitter);
1917 } else if (v == intvar[IGNORECASE]) {
1918 icase = istrue(v);
1919 } else {
1920 n = getvar_i(intvar[NF]);
1921 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1922
1923 }
1924}
1925
1926
1927static node *nextarg(node **pn)
1928{
1929 node *n;
1930
1931 n = *pn;
1932 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1933 *pn = n->r.n;
1934 n = n->l.n;
1935 } else {
1936 *pn = NULL;
1937 }
1938 return n;
1939}
1940
1941static void hashwalk_init(var *v, xhash *array)
1942{
1943 hash_item *hi;
1944 unsigned i;
1945 walker_list *w;
1946 walker_list *prev_walker;
1947
1948 if (v->type & VF_WALK) {
1949 prev_walker = v->x.walker;
1950 } else {
1951 v->type |= VF_WALK;
1952 prev_walker = NULL;
1953 }
1954 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1955
1956 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1);
1957 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1958 w->cur = w->end = w->wbuf;
1959 w->prev = prev_walker;
1960 for (i = 0; i < array->csize; i++) {
1961 hi = array->items[i];
1962 while (hi) {
1963 strcpy(w->end, hi->name);
1964 nextword(&w->end);
1965 hi = hi->next;
1966 }
1967 }
1968}
1969
1970static int hashwalk_next(var *v)
1971{
1972 walker_list *w = v->x.walker;
1973
1974 if (w->cur >= w->end) {
1975 walker_list *prev_walker = w->prev;
1976
1977 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1978 free(w);
1979 v->x.walker = prev_walker;
1980 return FALSE;
1981 }
1982
1983 setvar_s(v, nextword(&w->cur));
1984 return TRUE;
1985}
1986
1987
1988static int ptest(node *pattern)
1989{
1990
1991 return istrue(evaluate(pattern, &G.ptest__v));
1992}
1993
1994
1995static int awk_getline(rstream *rsm, var *v)
1996{
1997 char *b;
1998 regmatch_t pmatch[2];
1999 int size, a, p, pp = 0;
2000 int fd, so, eo, r, rp;
2001 char c, *m, *s;
2002
2003 debug_printf_eval("entered %s()\n", __func__);
2004
2005
2006
2007
2008 fd = fileno(rsm->F);
2009 m = rsm->buffer;
2010 a = rsm->adv;
2011 p = rsm->pos;
2012 size = rsm->size;
2013 c = (char) rsplitter.n.info;
2014 rp = 0;
2015
2016 if (!m)
2017 m = qrealloc(m, 256, &size);
2018
2019 do {
2020 b = m + a;
2021 so = eo = p;
2022 r = 1;
2023 if (p > 0) {
2024 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
2025 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2026 b, 1, pmatch, 0) == 0) {
2027 so = pmatch[0].rm_so;
2028 eo = pmatch[0].rm_eo;
2029 if (b[eo] != '\0')
2030 break;
2031 }
2032 } else if (c != '\0') {
2033 s = strchr(b+pp, c);
2034 if (!s)
2035 s = memchr(b+pp, '\0', p - pp);
2036 if (s) {
2037 so = eo = s-b;
2038 eo++;
2039 break;
2040 }
2041 } else {
2042 while (b[rp] == '\n')
2043 rp++;
2044 s = strstr(b+rp, "\n\n");
2045 if (s) {
2046 so = eo = s-b;
2047 while (b[eo] == '\n')
2048 eo++;
2049 if (b[eo] != '\0')
2050 break;
2051 }
2052 }
2053 }
2054
2055 if (a > 0) {
2056 memmove(m, m+a, p+1);
2057 b = m;
2058 a = 0;
2059 }
2060
2061 m = qrealloc(m, a+p+128, &size);
2062 b = m + a;
2063 pp = p;
2064 p += safe_read(fd, b+p, size-p-1);
2065 if (p < pp) {
2066 p = 0;
2067 r = 0;
2068 setvar_i(intvar[ERRNO], errno);
2069 }
2070 b[p] = '\0';
2071
2072 } while (p > pp);
2073
2074 if (p == 0) {
2075 r--;
2076 } else {
2077 c = b[so]; b[so] = '\0';
2078 setvar_s(v, b+rp);
2079 v->type |= VF_USER;
2080 b[so] = c;
2081 c = b[eo]; b[eo] = '\0';
2082 setvar_s(intvar[RT], b+so);
2083 b[eo] = c;
2084 }
2085
2086 rsm->buffer = m;
2087 rsm->adv = a + eo;
2088 rsm->pos = p - eo;
2089 rsm->size = size;
2090
2091 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2092
2093 return r;
2094}
2095
2096static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2097{
2098 int r = 0;
2099 char c;
2100 const char *s = format;
2101
2102 if (int_as_int && n == (long long)n) {
2103 r = snprintf(b, size, "%lld", (long long)n);
2104 } else {
2105 do { c = *s; } while (c && *++s);
2106 if (strchr("diouxX", c)) {
2107 r = snprintf(b, size, format, (int)n);
2108 } else if (strchr("eEfgG", c)) {
2109 r = snprintf(b, size, format, n);
2110 } else {
2111 syntax_error(EMSG_INV_FMT);
2112 }
2113 }
2114 return r;
2115}
2116
2117
2118static char *awk_printf(node *n)
2119{
2120 char *b = NULL;
2121 char *fmt, *s, *f;
2122 const char *s1;
2123 int i, j, incr, bsize;
2124 char c, c1;
2125 var *v, *arg;
2126
2127 v = nvalloc(1);
2128 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2129
2130 i = 0;
2131 while (*f) {
2132 s = f;
2133 while (*f && (*f != '%' || *++f == '%'))
2134 f++;
2135 while (*f && !isalpha(*f)) {
2136 if (*f == '*')
2137 syntax_error("%*x formats are not supported");
2138 f++;
2139 }
2140
2141 incr = (f - s) + MAXVARFMT;
2142 b = qrealloc(b, incr + i, &bsize);
2143 c = *f;
2144 if (c != '\0')
2145 f++;
2146 c1 = *f;
2147 *f = '\0';
2148 arg = evaluate(nextarg(&n), v);
2149
2150 j = i;
2151 if (c == 'c' || !c) {
2152 i += sprintf(b+i, s, is_numeric(arg) ?
2153 (char)getvar_i(arg) : *getvar_s(arg));
2154 } else if (c == 's') {
2155 s1 = getvar_s(arg);
2156 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2157 i += sprintf(b+i, s, s1);
2158 } else {
2159 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2160 }
2161 *f = c1;
2162
2163
2164 if (i < j)
2165 i = j;
2166 }
2167
2168 free(fmt);
2169 nvfree(v);
2170 b = xrealloc(b, i + 1);
2171 b[i] = '\0';
2172 return b;
2173}
2174
2175
2176
2177
2178
2179
2180
2181
2182static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2183{
2184 char *resbuf;
2185 const char *sp;
2186 int match_no, residx, replen, resbufsize;
2187 int regexec_flags;
2188 regmatch_t pmatch[10];
2189 regex_t sreg, *regex;
2190
2191 resbuf = NULL;
2192 residx = 0;
2193 match_no = 0;
2194 regexec_flags = 0;
2195 regex = as_regex(rn, &sreg);
2196 sp = getvar_s(src ? src : intvar[F0]);
2197 replen = strlen(repl);
2198 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2199 int so = pmatch[0].rm_so;
2200 int eo = pmatch[0].rm_eo;
2201
2202
2203 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2204 memcpy(resbuf + residx, sp, eo);
2205 residx += eo;
2206 if (++match_no >= nm) {
2207 const char *s;
2208 int nbs;
2209
2210
2211 residx -= (eo - so);
2212 nbs = 0;
2213 for (s = repl; *s; s++) {
2214 char c = resbuf[residx++] = *s;
2215 if (c == '\\') {
2216 nbs++;
2217 continue;
2218 }
2219 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2220 int j;
2221 residx -= ((nbs + 3) >> 1);
2222 j = 0;
2223 if (c != '&') {
2224 j = c - '0';
2225 nbs++;
2226 }
2227 if (nbs % 2) {
2228 resbuf[residx++] = c;
2229 } else {
2230 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2231 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2232 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2233 residx += n;
2234 }
2235 }
2236 nbs = 0;
2237 }
2238 }
2239
2240 regexec_flags = REG_NOTBOL;
2241 sp += eo;
2242 if (match_no == nm)
2243 break;
2244 if (eo == so) {
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255 resbuf[residx] = *sp;
2256 if (*sp == '\0')
2257 goto ret;
2258 sp++;
2259 residx++;
2260 }
2261 }
2262
2263 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2264 strcpy(resbuf + residx, sp);
2265 ret:
2266
2267 setvar_p(dest ? dest : intvar[F0], resbuf);
2268 if (regex == &sreg)
2269 regfree(regex);
2270 return match_no;
2271}
2272
2273static NOINLINE int do_mktime(const char *ds)
2274{
2275 struct tm then;
2276 int count;
2277
2278
2279 then.tm_isdst = -1;
2280
2281
2282
2283 count = sscanf(ds, "%u %u %u %u %u %u %d",
2284 &then.tm_year, &then.tm_mon, &then.tm_mday,
2285 &then.tm_hour, &then.tm_min, &then.tm_sec,
2286 &then.tm_isdst);
2287
2288 if (count < 6
2289 || (unsigned)then.tm_mon < 1
2290 || (unsigned)then.tm_year < 1900
2291 ) {
2292 return -1;
2293 }
2294
2295 then.tm_mon -= 1;
2296 then.tm_year -= 1900;
2297
2298 return mktime(&then);
2299}
2300
2301static NOINLINE var *exec_builtin(node *op, var *res)
2302{
2303#define tspl (G.exec_builtin__tspl)
2304
2305 var *tv;
2306 node *an[4];
2307 var *av[4];
2308 const char *as[4];
2309 regmatch_t pmatch[2];
2310 regex_t sreg, *re;
2311 node *spl;
2312 uint32_t isr, info;
2313 int nargs;
2314 time_t tt;
2315 int i, l, ll, n;
2316
2317 tv = nvalloc(4);
2318 isr = info = op->info;
2319 op = op->l.n;
2320
2321 av[2] = av[3] = NULL;
2322 for (i = 0; i < 4 && op; i++) {
2323 an[i] = nextarg(&op);
2324 if (isr & 0x09000000)
2325 av[i] = evaluate(an[i], &tv[i]);
2326 if (isr & 0x08000000)
2327 as[i] = getvar_s(av[i]);
2328 isr >>= 1;
2329 }
2330
2331 nargs = i;
2332 if ((uint32_t)nargs < (info >> 30))
2333 syntax_error(EMSG_TOO_FEW_ARGS);
2334
2335 info &= OPNMASK;
2336 switch (info) {
2337
2338 case B_a2:
2339 if (ENABLE_FEATURE_AWK_LIBM)
2340 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2341 else
2342 syntax_error(EMSG_NO_MATH);
2343 break;
2344
2345 case B_sp: {
2346 char *s, *s1;
2347
2348 if (nargs > 2) {
2349 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2350 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2351 } else {
2352 spl = &fsplitter.n;
2353 }
2354
2355 n = awk_split(as[0], spl, &s);
2356 s1 = s;
2357 clear_array(iamarray(av[1]));
2358 for (i = 1; i <= n; i++)
2359 setari_u(av[1], i, nextword(&s));
2360 free(s1);
2361 setvar_i(res, n);
2362 break;
2363 }
2364
2365 case B_ss: {
2366 char *s;
2367
2368 l = strlen(as[0]);
2369 i = getvar_i(av[1]) - 1;
2370 if (i > l)
2371 i = l;
2372 if (i < 0)
2373 i = 0;
2374 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2375 if (n < 0)
2376 n = 0;
2377 s = xstrndup(as[0]+i, n);
2378 setvar_p(res, s);
2379 break;
2380 }
2381
2382
2383
2384 case B_an:
2385 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2386 break;
2387
2388 case B_co:
2389 setvar_i(res, ~getvar_i_int(av[0]));
2390 break;
2391
2392 case B_ls:
2393 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2394 break;
2395
2396 case B_or:
2397 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2398 break;
2399
2400 case B_rs:
2401 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2402 break;
2403
2404 case B_xo:
2405 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2406 break;
2407
2408 case B_lo:
2409 case B_up: {
2410 char *s, *s1;
2411 s1 = s = xstrdup(as[0]);
2412 while (*s1) {
2413
2414 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2415 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2416 s1++;
2417 }
2418 setvar_p(res, s);
2419 break;
2420 }
2421
2422 case B_ix:
2423 n = 0;
2424 ll = strlen(as[1]);
2425 l = strlen(as[0]) - ll;
2426 if (ll > 0 && l >= 0) {
2427 if (!icase) {
2428 char *s = strstr(as[0], as[1]);
2429 if (s)
2430 n = (s - as[0]) + 1;
2431 } else {
2432
2433
2434
2435 for (i = 0; i <= l; i++) {
2436 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2437 n = i+1;
2438 break;
2439 }
2440 }
2441 }
2442 }
2443 setvar_i(res, n);
2444 break;
2445
2446 case B_ti:
2447 if (nargs > 1)
2448 tt = getvar_i(av[1]);
2449 else
2450 time(&tt);
2451
2452 i = strftime(g_buf, MAXVARFMT,
2453 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2454 localtime(&tt));
2455 g_buf[i] = '\0';
2456 setvar_s(res, g_buf);
2457 break;
2458
2459 case B_mt:
2460 setvar_i(res, do_mktime(as[0]));
2461 break;
2462
2463 case B_ma:
2464 re = as_regex(an[1], &sreg);
2465 n = regexec(re, as[0], 1, pmatch, 0);
2466 if (n == 0) {
2467 pmatch[0].rm_so++;
2468 pmatch[0].rm_eo++;
2469 } else {
2470 pmatch[0].rm_so = 0;
2471 pmatch[0].rm_eo = -1;
2472 }
2473 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2474 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2475 setvar_i(res, pmatch[0].rm_so);
2476 if (re == &sreg)
2477 regfree(re);
2478 break;
2479
2480 case B_ge:
2481 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2482 break;
2483
2484 case B_gs:
2485 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2486 break;
2487
2488 case B_su:
2489 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2490 break;
2491 }
2492
2493 nvfree(tv);
2494 return res;
2495#undef tspl
2496}
2497
2498
2499
2500
2501
2502#define XC(n) ((n) >> 8)
2503
2504static var *evaluate(node *op, var *res)
2505{
2506
2507#define fnargs (G.evaluate__fnargs)
2508
2509#define seed (G.evaluate__seed)
2510#define sreg (G.evaluate__sreg)
2511
2512 var *v1;
2513
2514 if (!op)
2515 return setvar_s(res, NULL);
2516
2517 debug_printf_eval("entered %s()\n", __func__);
2518
2519 v1 = nvalloc(2);
2520
2521 while (op) {
2522 struct {
2523 var *v;
2524 const char *s;
2525 } L = L;
2526 struct {
2527 var *v;
2528 const char *s;
2529 } R = R;
2530 double L_d = L_d;
2531 uint32_t opinfo;
2532 int opn;
2533 node *op1;
2534
2535 opinfo = op->info;
2536 opn = (opinfo & OPNMASK);
2537 g_lineno = op->lineno;
2538 op1 = op->l.n;
2539 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2540
2541
2542
2543
2544
2545 if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) {
2546 uint32_t info = op1->info & OPCLSMASK;
2547 var *v;
2548
2549 debug_printf_eval("DELETE\n");
2550 if (info == OC_VAR) {
2551 v = op1->l.v;
2552 } else if (info == OC_FNARG) {
2553 v = &fnargs[op1->l.aidx];
2554 } else {
2555 syntax_error(EMSG_NOT_ARRAY);
2556 }
2557 if (op1->r.n) {
2558 const char *s;
2559 s = getvar_s(evaluate(op1->r.n, v1));
2560 hash_remove(iamarray(v), s);
2561 } else {
2562 clear_array(iamarray(v));
2563 }
2564 goto next;
2565 }
2566
2567
2568 if (opinfo & OF_RES1)
2569 L.v = evaluate(op1, v1);
2570 if (opinfo & OF_RES2)
2571 R.v = evaluate(op->r.n, v1+1);
2572 if (opinfo & OF_STR1) {
2573 L.s = getvar_s(L.v);
2574 debug_printf_eval("L.s:'%s'\n", L.s);
2575 }
2576 if (opinfo & OF_STR2) {
2577 R.s = getvar_s(R.v);
2578 debug_printf_eval("R.s:'%s'\n", R.s);
2579 }
2580 if (opinfo & OF_NUM1) {
2581 L_d = getvar_i(L.v);
2582 debug_printf_eval("L_d:%f\n", L_d);
2583 }
2584
2585 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2586 switch (XC(opinfo & OPCLSMASK)) {
2587
2588
2589
2590
2591 case XC( OC_TEST ):
2592 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2593
2594 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2595 op->info |= OF_CHECKED;
2596 if (ptest(op1->r.n))
2597 op->info &= ~OF_CHECKED;
2598 op = op->a.n;
2599 } else {
2600 op = op->r.n;
2601 }
2602 } else {
2603 op = ptest(op1) ? op->a.n : op->r.n;
2604 }
2605 break;
2606
2607
2608 case XC( OC_EXEC ):
2609 break;
2610
2611
2612 case XC( OC_BR ):
2613 op = istrue(L.v) ? op->a.n : op->r.n;
2614 break;
2615
2616
2617 case XC( OC_WALKINIT ):
2618 hashwalk_init(L.v, iamarray(R.v));
2619 break;
2620
2621
2622 case XC( OC_WALKNEXT ):
2623 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2624 break;
2625
2626 case XC( OC_PRINT ):
2627 case XC( OC_PRINTF ): {
2628 FILE *F = stdout;
2629
2630 if (op->r.n) {
2631 rstream *rsm = newfile(R.s);
2632 if (!rsm->F) {
2633 if (opn == '|') {
2634 rsm->F = popen(R.s, "w");
2635 if (rsm->F == NULL)
2636 bb_perror_msg_and_die("popen");
2637 rsm->is_pipe = 1;
2638 } else {
2639 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2640 }
2641 }
2642 F = rsm->F;
2643 }
2644
2645 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2646 if (!op1) {
2647 fputs(getvar_s(intvar[F0]), F);
2648 } else {
2649 while (op1) {
2650 var *v = evaluate(nextarg(&op1), v1);
2651 if (v->type & VF_NUMBER) {
2652 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2653 getvar_i(v), TRUE);
2654 fputs(g_buf, F);
2655 } else {
2656 fputs(getvar_s(v), F);
2657 }
2658
2659 if (op1)
2660 fputs(getvar_s(intvar[OFS]), F);
2661 }
2662 }
2663 fputs(getvar_s(intvar[ORS]), F);
2664
2665 } else {
2666 char *s = awk_printf(op1);
2667 fputs(s, F);
2668 free(s);
2669 }
2670 fflush(F);
2671 break;
2672 }
2673
2674
2675
2676 case XC( OC_NEWSOURCE ):
2677 g_progname = op->l.new_progname;
2678 break;
2679
2680 case XC( OC_RETURN ):
2681 copyvar(res, L.v);
2682 break;
2683
2684 case XC( OC_NEXTFILE ):
2685 nextfile = TRUE;
2686 case XC( OC_NEXT ):
2687 nextrec = TRUE;
2688 case XC( OC_DONE ):
2689 clrvar(res);
2690 break;
2691
2692 case XC( OC_EXIT ):
2693 awk_exit(L_d);
2694
2695
2696
2697 case XC( OC_VAR ):
2698 debug_printf_eval("VAR\n");
2699 L.v = op->l.v;
2700 if (L.v == intvar[NF])
2701 split_f0();
2702 goto v_cont;
2703
2704 case XC( OC_FNARG ):
2705 debug_printf_eval("FNARG[%d]\n", op->l.aidx);
2706 L.v = &fnargs[op->l.aidx];
2707 v_cont:
2708 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2709 break;
2710
2711 case XC( OC_IN ):
2712 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2713 break;
2714
2715 case XC( OC_REGEXP ):
2716 op1 = op;
2717 L.s = getvar_s(intvar[F0]);
2718 goto re_cont;
2719
2720 case XC( OC_MATCH ):
2721 op1 = op->r.n;
2722 re_cont:
2723 {
2724 regex_t *re = as_regex(op1, &sreg);
2725 int i = regexec(re, L.s, 0, NULL, 0);
2726 if (re == &sreg)
2727 regfree(re);
2728 setvar_i(res, (i == 0) ^ (opn == '!'));
2729 }
2730 break;
2731
2732 case XC( OC_MOVE ):
2733 debug_printf_eval("MOVE\n");
2734
2735
2736
2737
2738
2739
2740
2741 res = copyvar(L.v, R.v);
2742
2743 break;
2744
2745 case XC( OC_TERNARY ):
2746 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2747 syntax_error(EMSG_POSSIBLE_ERROR);
2748 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2749 break;
2750
2751 case XC( OC_FUNC ): {
2752 var *vbeg, *v;
2753 const char *sv_progname;
2754
2755
2756 if (!op->r.n->info && !op->r.f->body.first)
2757 syntax_error(EMSG_UNDEF_FUNC);
2758
2759 vbeg = v = nvalloc(op->r.f->nargs + 1);
2760 while (op1) {
2761 var *arg = evaluate(nextarg(&op1), v1);
2762 copyvar(v, arg);
2763 v->type |= VF_CHILD;
2764 v->x.parent = arg;
2765 if (++v - vbeg >= op->r.f->nargs)
2766 break;
2767 }
2768
2769 v = fnargs;
2770 fnargs = vbeg;
2771 sv_progname = g_progname;
2772
2773 res = evaluate(op->r.f->body.first, res);
2774
2775 g_progname = sv_progname;
2776 nvfree(fnargs);
2777 fnargs = v;
2778
2779 break;
2780 }
2781
2782 case XC( OC_GETLINE ):
2783 case XC( OC_PGETLINE ): {
2784 rstream *rsm;
2785 int i;
2786
2787 if (op1) {
2788 rsm = newfile(L.s);
2789 if (!rsm->F) {
2790 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2791 rsm->F = popen(L.s, "r");
2792 rsm->is_pipe = TRUE;
2793 } else {
2794 rsm->F = fopen_for_read(L.s);
2795 }
2796 }
2797 } else {
2798 if (!iF)
2799 iF = next_input_file();
2800 rsm = iF;
2801 }
2802
2803 if (!rsm || !rsm->F) {
2804 setvar_i(intvar[ERRNO], errno);
2805 setvar_i(res, -1);
2806 break;
2807 }
2808
2809 if (!op->r.n)
2810 R.v = intvar[F0];
2811
2812 i = awk_getline(rsm, R.v);
2813 if (i > 0 && !op1) {
2814 incvar(intvar[FNR]);
2815 incvar(intvar[NR]);
2816 }
2817 setvar_i(res, i);
2818 break;
2819 }
2820
2821
2822 case XC( OC_FBLTIN ): {
2823 double R_d = R_d;
2824
2825 switch (opn) {
2826 case F_in:
2827 R_d = (long long)L_d;
2828 break;
2829
2830 case F_rn:
2831 R_d = (double)rand() / (double)RAND_MAX;
2832 break;
2833
2834 case F_co:
2835 if (ENABLE_FEATURE_AWK_LIBM) {
2836 R_d = cos(L_d);
2837 break;
2838 }
2839
2840 case F_ex:
2841 if (ENABLE_FEATURE_AWK_LIBM) {
2842 R_d = exp(L_d);
2843 break;
2844 }
2845
2846 case F_lg:
2847 if (ENABLE_FEATURE_AWK_LIBM) {
2848 R_d = log(L_d);
2849 break;
2850 }
2851
2852 case F_si:
2853 if (ENABLE_FEATURE_AWK_LIBM) {
2854 R_d = sin(L_d);
2855 break;
2856 }
2857
2858 case F_sq:
2859 if (ENABLE_FEATURE_AWK_LIBM) {
2860 R_d = sqrt(L_d);
2861 break;
2862 }
2863
2864 syntax_error(EMSG_NO_MATH);
2865 break;
2866
2867 case F_sr:
2868 R_d = (double)seed;
2869 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2870 srand(seed);
2871 break;
2872
2873 case F_ti:
2874 R_d = time(NULL);
2875 break;
2876
2877 case F_le:
2878 debug_printf_eval("length: L.s:'%s'\n", L.s);
2879 if (!op1) {
2880 L.s = getvar_s(intvar[F0]);
2881 debug_printf_eval("length: L.s='%s'\n", L.s);
2882 }
2883 else if (L.v->type & VF_ARRAY) {
2884 R_d = L.v->x.array->nel;
2885 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2886 break;
2887 }
2888 R_d = strlen(L.s);
2889 break;
2890
2891 case F_sy:
2892 fflush_all();
2893 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2894 ? (system(L.s) >> 8) : 0;
2895 break;
2896
2897 case F_ff:
2898 if (!op1) {
2899 fflush(stdout);
2900 } else if (L.s && *L.s) {
2901 rstream *rsm = newfile(L.s);
2902 fflush(rsm->F);
2903 } else {
2904 fflush_all();
2905 }
2906 break;
2907
2908 case F_cl: {
2909 rstream *rsm;
2910 int err = 0;
2911 rsm = (rstream *)hash_search(fdhash, L.s);
2912 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2913 if (rsm) {
2914 debug_printf_eval("OC_FBLTIN F_cl "
2915 "rsm->is_pipe:%d, ->F:%p\n",
2916 rsm->is_pipe, rsm->F);
2917
2918
2919
2920
2921 if (rsm->F)
2922 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2923 free(rsm->buffer);
2924 hash_remove(fdhash, L.s);
2925 }
2926 if (err)
2927 setvar_i(intvar[ERRNO], errno);
2928 R_d = (double)err;
2929 break;
2930 }
2931 }
2932 setvar_i(res, R_d);
2933 break;
2934 }
2935
2936 case XC( OC_BUILTIN ):
2937 res = exec_builtin(op, res);
2938 break;
2939
2940 case XC( OC_SPRINTF ):
2941 setvar_p(res, awk_printf(op1));
2942 break;
2943
2944 case XC( OC_UNARY ): {
2945 double Ld, R_d;
2946
2947 Ld = R_d = getvar_i(R.v);
2948 switch (opn) {
2949 case 'P':
2950 Ld = ++R_d;
2951 goto r_op_change;
2952 case 'p':
2953 R_d++;
2954 goto r_op_change;
2955 case 'M':
2956 Ld = --R_d;
2957 goto r_op_change;
2958 case 'm':
2959 R_d--;
2960 r_op_change:
2961 setvar_i(R.v, R_d);
2962 break;
2963 case '!':
2964 Ld = !istrue(R.v);
2965 break;
2966 case '-':
2967 Ld = -R_d;
2968 break;
2969 }
2970 setvar_i(res, Ld);
2971 break;
2972 }
2973
2974 case XC( OC_FIELD ): {
2975 int i = (int)getvar_i(R.v);
2976 if (i < 0)
2977 syntax_error(EMSG_NEGATIVE_FIELD);
2978 if (i == 0) {
2979 res = intvar[F0];
2980 } else {
2981 split_f0();
2982 if (i > nfields)
2983 fsrealloc(i);
2984 res = &Fields[i - 1];
2985 }
2986 break;
2987 }
2988
2989
2990 case XC( OC_CONCAT ):
2991 case XC( OC_COMMA ): {
2992 const char *sep = "";
2993 if ((opinfo & OPCLSMASK) == OC_COMMA)
2994 sep = getvar_s(intvar[SUBSEP]);
2995 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2996 break;
2997 }
2998
2999 case XC( OC_LAND ):
3000 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
3001 break;
3002
3003 case XC( OC_LOR ):
3004 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
3005 break;
3006
3007 case XC( OC_BINARY ):
3008 case XC( OC_REPLACE ): {
3009 double R_d = getvar_i(R.v);
3010 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
3011 switch (opn) {
3012 case '+':
3013 L_d += R_d;
3014 break;
3015 case '-':
3016 L_d -= R_d;
3017 break;
3018 case '*':
3019 L_d *= R_d;
3020 break;
3021 case '/':
3022 if (R_d == 0)
3023 syntax_error(EMSG_DIV_BY_ZERO);
3024 L_d /= R_d;
3025 break;
3026 case '&':
3027 if (ENABLE_FEATURE_AWK_LIBM)
3028 L_d = pow(L_d, R_d);
3029 else
3030 syntax_error(EMSG_NO_MATH);
3031 break;
3032 case '%':
3033 if (R_d == 0)
3034 syntax_error(EMSG_DIV_BY_ZERO);
3035 L_d -= (long long)(L_d / R_d) * R_d;
3036 break;
3037 }
3038 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3039 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3040 break;
3041 }
3042
3043 case XC( OC_COMPARE ): {
3044 int i = i;
3045 double Ld;
3046
3047 if (is_numeric(L.v) && is_numeric(R.v)) {
3048 Ld = getvar_i(L.v) - getvar_i(R.v);
3049 } else {
3050 const char *l = getvar_s(L.v);
3051 const char *r = getvar_s(R.v);
3052 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3053 }
3054 switch (opn & 0xfe) {
3055 case 0:
3056 i = (Ld > 0);
3057 break;
3058 case 2:
3059 i = (Ld >= 0);
3060 break;
3061 case 4:
3062 i = (Ld == 0);
3063 break;
3064 }
3065 setvar_i(res, (i == 0) ^ (opn & 1));
3066 break;
3067 }
3068
3069 default:
3070 syntax_error(EMSG_POSSIBLE_ERROR);
3071 }
3072 next:
3073 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3074 op = op->a.n;
3075 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3076 break;
3077 if (nextrec)
3078 break;
3079 }
3080
3081 nvfree(v1);
3082 debug_printf_eval("returning from %s(): %p\n", __func__, res);
3083 return res;
3084#undef fnargs
3085#undef seed
3086#undef sreg
3087}
3088
3089
3090
3091
3092static int awk_exit(int r)
3093{
3094 var tv;
3095 unsigned i;
3096 hash_item *hi;
3097
3098 zero_out_var(&tv);
3099
3100 if (!exiting) {
3101 exiting = TRUE;
3102 nextrec = FALSE;
3103 evaluate(endseq.first, &tv);
3104 }
3105
3106
3107 for (i = 0; i < fdhash->csize; i++) {
3108 hi = fdhash->items[i];
3109 while (hi) {
3110 if (hi->data.rs.F && hi->data.rs.is_pipe)
3111 pclose(hi->data.rs.F);
3112 hi = hi->next;
3113 }
3114 }
3115
3116 exit(r);
3117}
3118
3119
3120
3121static int is_assignment(const char *expr)
3122{
3123 char *exprc, *val;
3124
3125 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3126 return FALSE;
3127 }
3128
3129 exprc = xstrdup(expr);
3130 val = exprc + (val - expr);
3131 *val++ = '\0';
3132
3133 unescape_string_in_place(val);
3134 setvar_u(newvar(exprc), val);
3135 free(exprc);
3136 return TRUE;
3137}
3138
3139
3140static rstream *next_input_file(void)
3141{
3142#define rsm (G.next_input_file__rsm)
3143#define files_happen (G.next_input_file__files_happen)
3144
3145 FILE *F;
3146 const char *fname, *ind;
3147
3148 if (rsm.F)
3149 fclose(rsm.F);
3150 rsm.F = NULL;
3151 rsm.pos = rsm.adv = 0;
3152
3153 for (;;) {
3154 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3155 if (files_happen)
3156 return NULL;
3157 fname = "-";
3158 F = stdin;
3159 break;
3160 }
3161 ind = getvar_s(incvar(intvar[ARGIND]));
3162 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3163 if (fname && *fname && !is_assignment(fname)) {
3164 F = xfopen_stdin(fname);
3165 break;
3166 }
3167 }
3168
3169 files_happen = TRUE;
3170 setvar_s(intvar[FILENAME], fname);
3171 rsm.F = F;
3172 return &rsm;
3173#undef rsm
3174#undef files_happen
3175}
3176
3177int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3178int awk_main(int argc UNUSED_PARAM, char **argv)
3179{
3180 unsigned opt;
3181 char *opt_F;
3182 llist_t *list_v = NULL;
3183 llist_t *list_f = NULL;
3184#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3185 llist_t *list_e = NULL;
3186#endif
3187 int i, j;
3188 var *v;
3189 var tv;
3190 char **envp;
3191 char *vnames = (char *)vNames;
3192 char *vvalues = (char *)vValues;
3193
3194 INIT_G();
3195
3196
3197
3198 if (ENABLE_LOCALE_SUPPORT)
3199 setlocale(LC_NUMERIC, "C");
3200
3201 zero_out_var(&tv);
3202
3203
3204 g_buf = xmalloc(MAXVARFMT + 1);
3205
3206 vhash = hash_init();
3207 ahash = hash_init();
3208 fdhash = hash_init();
3209 fnhash = hash_init();
3210
3211
3212 for (i = 0; *vnames; i++) {
3213 intvar[i] = v = newvar(nextword(&vnames));
3214 if (*vvalues != '\377')
3215 setvar_s(v, nextword(&vvalues));
3216 else
3217 setvar_i(v, 0);
3218
3219 if (*vnames == '*') {
3220 v->type |= VF_SPECIAL;
3221 vnames++;
3222 }
3223 }
3224
3225 handle_special(intvar[FS]);
3226 handle_special(intvar[RS]);
3227
3228 newfile("/dev/stdin")->F = stdin;
3229 newfile("/dev/stdout")->F = stdout;
3230 newfile("/dev/stderr")->F = stderr;
3231
3232
3233 if (environ) for (envp = environ; *envp; envp++) {
3234
3235 char *s = *envp;
3236 char *s1 = strchr(s, '=');
3237 if (s1) {
3238 *s1 = '\0';
3239
3240
3241 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3242 *s1 = '=';
3243 }
3244 }
3245 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3246 argv += optind;
3247
3248 if (opt & OPT_W)
3249 bb_error_msg("warning: option -W is ignored");
3250 if (opt & OPT_F) {
3251 unescape_string_in_place(opt_F);
3252 setvar_s(intvar[FS], opt_F);
3253 }
3254 while (list_v) {
3255 if (!is_assignment(llist_pop(&list_v)))
3256 bb_show_usage();
3257 }
3258 while (list_f) {
3259 char *s = NULL;
3260 FILE *from_file;
3261
3262 g_progname = llist_pop(&list_f);
3263 from_file = xfopen_stdin(g_progname);
3264
3265 for (i = j = 1; j > 0; i += j) {
3266 s = xrealloc(s, i + 4096);
3267 j = fread(s + i, 1, 4094, from_file);
3268 }
3269 s[i] = '\0';
3270 fclose(from_file);
3271 parse_program(s + 1);
3272 free(s);
3273 }
3274 g_progname = "cmd. line";
3275#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3276 while (list_e) {
3277 parse_program(llist_pop(&list_e));
3278 }
3279#endif
3280 if (!(opt & (OPT_f | OPT_e))) {
3281 if (!*argv)
3282 bb_show_usage();
3283 parse_program(*argv++);
3284 }
3285
3286
3287 setari_u(intvar[ARGV], 0, "awk");
3288 i = 0;
3289 while (*argv)
3290 setari_u(intvar[ARGV], ++i, *argv++);
3291 setvar_i(intvar[ARGC], i + 1);
3292
3293 evaluate(beginseq.first, &tv);
3294 if (!mainseq.first && !endseq.first)
3295 awk_exit(EXIT_SUCCESS);
3296
3297
3298 if (!iF)
3299 iF = next_input_file();
3300
3301
3302 while (iF) {
3303 nextfile = FALSE;
3304 setvar_i(intvar[FNR], 0);
3305
3306 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3307 nextrec = FALSE;
3308 incvar(intvar[NR]);
3309 incvar(intvar[FNR]);
3310 evaluate(mainseq.first, &tv);
3311
3312 if (nextfile)
3313 break;
3314 }
3315
3316 if (i < 0)
3317 syntax_error(strerror(errno));
3318
3319 iF = next_input_file();
3320 }
3321
3322 awk_exit(EXIT_SUCCESS);
3323
3324}
3325