1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include "libbb.h"
49#include "xregex.h"
50#include <math.h>
51
52
53
54
55
56
57#define debug_printf_walker(...) do {} while (0)
58#define debug_printf_eval(...) do {} while (0)
59#define debug_printf_parse(...) do {} while (0)
60
61#ifndef debug_printf_walker
62# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
63#endif
64#ifndef debug_printf_eval
65# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
66#endif
67#ifndef debug_printf_parse
68# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
69#else
70# define debug_parse_print_tc(...) ((void)0)
71#endif
72
73
74
75
76
77
78#define OPTSTR_AWK "+" \
79 "F:v:*f:*" \
80 IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
81 "W:"
82enum {
83 OPTBIT_F,
84 OPTBIT_v,
85 OPTBIT_f,
86 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,)
87 OPTBIT_W,
88 OPT_F = 1 << OPTBIT_F,
89 OPT_v = 1 << OPTBIT_v,
90 OPT_f = 1 << OPTBIT_f,
91 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
92 OPT_W = 1 << OPTBIT_W
93};
94
95#define MAXVARFMT 240
96
97
98#define VF_NUMBER 0x0001
99#define VF_ARRAY 0x0002
100
101#define VF_CACHED 0x0100
102#define VF_USER 0x0200
103#define VF_SPECIAL 0x0400
104#define VF_WALK 0x0800
105#define VF_FSTR 0x1000
106#define VF_CHILD 0x2000
107#define VF_DIRTY 0x4000
108
109
110#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
111
112typedef struct walker_list {
113 char *end;
114 char *cur;
115 struct walker_list *prev;
116 char wbuf[1];
117} walker_list;
118
119
120typedef struct var_s {
121 unsigned type;
122 char *string;
123 double number;
124 union {
125 int aidx;
126 struct xhash_s *array;
127 struct var_s *parent;
128 walker_list *walker;
129 } x;
130} var;
131
132
133typedef struct chain_s {
134 struct node_s *first;
135 struct node_s *last;
136 const char *programname;
137} chain;
138
139
140typedef struct func_s {
141 unsigned nargs;
142 smallint defined;
143 struct chain_s body;
144} func;
145
146
147typedef struct rstream_s {
148 FILE *F;
149 char *buffer;
150 int adv;
151 int size;
152 int pos;
153 smallint is_pipe;
154} rstream;
155
156typedef struct hash_item_s {
157 union {
158 struct var_s v;
159 struct rstream_s rs;
160 struct func_s f;
161 } data;
162 struct hash_item_s *next;
163 char name[1];
164} hash_item;
165
166typedef struct xhash_s {
167 unsigned nel;
168 unsigned csize;
169 unsigned nprime;
170 unsigned glen;
171 struct hash_item_s **items;
172} xhash;
173
174
175typedef struct node_s {
176 uint32_t info;
177 unsigned lineno;
178 union {
179 struct node_s *n;
180 var *v;
181 int aidx;
182 const char *new_progname;
183 regex_t *re;
184 } l;
185 union {
186 struct node_s *n;
187 regex_t *ire;
188 func *f;
189 } r;
190 union {
191 struct node_s *n;
192 } a;
193} node;
194
195typedef struct tsplitter_s {
196 node n;
197 regex_t re[2];
198} tsplitter;
199
200
201
202#define TC_LPAREN (1 << 0)
203#define TC_RPAREN (1 << 1)
204#define TC_REGEXP (1 << 2)
205#define TC_OUTRDR (1 << 3)
206#define TC_UOPPOST (1 << 4)
207#define TC_UOPPRE1 (1 << 5)
208#define TC_BINOPX (1 << 6)
209#define TC_IN (1 << 7)
210#define TC_COMMA (1 << 8)
211#define TC_PIPE (1 << 9)
212#define TC_UOPPRE2 (1 << 10)
213#define TC_ARRTERM (1 << 11)
214#define TC_LBRACE (1 << 12)
215#define TC_RBRACE (1 << 13)
216#define TC_SEMICOL (1 << 14)
217#define TC_NEWLINE (1 << 15)
218#define TC_STATX (1 << 16)
219#define TC_WHILE (1 << 17)
220#define TC_ELSE (1 << 18)
221#define TC_BUILTIN (1 << 19)
222
223
224
225
226
227#define TC_LENGTH (1 << 20)
228#define TC_GETLINE (1 << 21)
229#define TC_FUNCDECL (1 << 22)
230#define TC_BEGIN (1 << 23)
231#define TC_END (1 << 24)
232#define TC_EOF (1 << 25)
233#define TC_VARIABLE (1 << 26)
234#define TC_ARRAY (1 << 27)
235#define TC_FUNCTION (1 << 28)
236#define TC_STRING (1 << 29)
237#define TC_NUMBER (1 << 30)
238
239#ifndef debug_parse_print_tc
240static void debug_parse_print_tc(uint32_t n)
241{
242 if (n & TC_LPAREN ) debug_printf_parse(" LPAREN" );
243 if (n & TC_RPAREN ) debug_printf_parse(" RPAREN" );
244 if (n & TC_REGEXP ) debug_printf_parse(" REGEXP" );
245 if (n & TC_OUTRDR ) debug_printf_parse(" OUTRDR" );
246 if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
247 if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
248 if (n & TC_BINOPX ) debug_printf_parse(" BINOPX" );
249 if (n & TC_IN ) debug_printf_parse(" IN" );
250 if (n & TC_COMMA ) debug_printf_parse(" COMMA" );
251 if (n & TC_PIPE ) debug_printf_parse(" PIPE" );
252 if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
253 if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
254 if (n & TC_LBRACE ) debug_printf_parse(" LBRACE" );
255 if (n & TC_RBRACE ) debug_printf_parse(" RBRACE" );
256 if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
257 if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
258 if (n & TC_STATX ) debug_printf_parse(" STATX" );
259 if (n & TC_WHILE ) debug_printf_parse(" WHILE" );
260 if (n & TC_ELSE ) debug_printf_parse(" ELSE" );
261 if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
262 if (n & TC_LENGTH ) debug_printf_parse(" LENGTH" );
263 if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
264 if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
265 if (n & TC_BEGIN ) debug_printf_parse(" BEGIN" );
266 if (n & TC_END ) debug_printf_parse(" END" );
267 if (n & TC_EOF ) debug_printf_parse(" EOF" );
268 if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
269 if (n & TC_ARRAY ) debug_printf_parse(" ARRAY" );
270 if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
271 if (n & TC_STRING ) debug_printf_parse(" STRING" );
272 if (n & TC_NUMBER ) debug_printf_parse(" NUMBER" );
273}
274#endif
275
276
277#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
278
279#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
280
281#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
282 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
283 | TC_LPAREN | TC_STRING | TC_NUMBER)
284
285#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
286#define TS_STATEMNT (TC_STATX | TC_WHILE)
287
288
289#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
290 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
291 | TC_FUNCDECL | TC_BEGIN | TC_END)
292
293
294#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
295 | TC_SEMICOL | TC_NEWLINE)
296
297
298#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
299
300#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \
301 | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
302
303
304
305#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
306 | TC_STRING | TC_NUMBER | TC_UOPPOST \
307 | TC_LENGTH)
308#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
309
310#define OF_RES1 0x010000
311#define OF_RES2 0x020000
312#define OF_STR1 0x040000
313#define OF_STR2 0x080000
314#define OF_NUM1 0x100000
315#define OF_CHECKED 0x200000
316#define OF_REQUIRED 0x400000
317
318
319#define xx 0
320#define xV OF_RES2
321#define xS (OF_RES2 | OF_STR2)
322#define Vx OF_RES1
323#define Rx OF_REQUIRED
324#define VV (OF_RES1 | OF_RES2)
325#define Nx (OF_RES1 | OF_NUM1)
326#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
327#define Sx (OF_RES1 | OF_STR1)
328#define SV (OF_RES1 | OF_STR1 | OF_RES2)
329#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
330
331#define OPCLSMASK 0xFF00
332#define OPNMASK 0x007F
333
334
335
336
337#undef P
338#undef PRIMASK
339#undef PRIMASK2
340#define P(x) (x << 24)
341#define PRIMASK 0x7F000000
342#define PRIMASK2 0x7E000000
343
344
345#define SHIFT_TIL_THIS 0x0600
346#define RECUR_FROM_THIS 0x1000
347enum {
348 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
349 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
350
351 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
352 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
353 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
354
355 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
356 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
357 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
358 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
359 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
360 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
361 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
362 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
363 OC_DONE = 0x2800,
364
365 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
366 ST_WHILE = 0x3300
367};
368
369
370enum {
371 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
372 F_ti, F_le, F_sy, F_ff, F_cl
373};
374
375
376enum {
377 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
378 B_ge, B_gs, B_su,
379 B_an, B_co, B_ls, B_or, B_rs, B_xo,
380};
381
382
383
384#define NTC "\377"
385#define NTCC '\377'
386
387static const char tokenlist[] ALIGN1 =
388 "\1(" NTC
389 "\1)" NTC
390 "\1/" NTC
391 "\2>>" "\1>" "\1|" NTC
392 "\2++" "\2--" NTC
393 "\2++" "\2--" "\1$" NTC
394 "\2==" "\1=" "\2+=" "\2-="
395 "\2*=" "\2/=" "\2%=" "\2^="
396 "\1+" "\1-" "\3**=" "\2**"
397 "\1/" "\1%" "\1^" "\1*"
398 "\2!=" "\2>=" "\2<=" "\1>"
399 "\1<" "\2!~" "\1~" "\2&&"
400 "\2||" "\1?" "\1:" NTC
401 "\2in" NTC
402 "\1," NTC
403 "\1|" NTC
404 "\1+" "\1-" "\1!" NTC
405 "\1]" NTC
406 "\1{" NTC
407 "\1}" NTC
408 "\1;" NTC
409 "\1\n" NTC
410 "\2if" "\2do" "\3for" "\5break"
411 "\10continue" "\6delete" "\5print"
412 "\6printf" "\4next" "\10nextfile"
413 "\6return" "\4exit" NTC
414 "\5while" NTC
415 "\4else" NTC
416 "\3and" "\5compl" "\6lshift" "\2or"
417 "\6rshift" "\3xor"
418 "\5close" "\6system" "\6fflush" "\5atan2"
419 "\3cos" "\3exp" "\3int" "\3log"
420 "\4rand" "\3sin" "\4sqrt" "\5srand"
421 "\6gensub" "\4gsub" "\5index"
422 "\5match" "\5split" "\7sprintf" "\3sub"
423 "\6substr" "\7systime" "\10strftime" "\6mktime"
424 "\7tolower" "\7toupper" NTC
425 "\6length" NTC
426 "\7getline" NTC
427 "\4func" "\10function" NTC
428 "\5BEGIN" NTC
429 "\3END"
430
431 ;
432
433static const uint32_t tokeninfo[] ALIGN4 = {
434 0,
435 0,
436#define TI_REGEXP OC_REGEXP
437 TI_REGEXP,
438 xS|'a', xS|'w', xS|'|',
439 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
440#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
441#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
442 TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
443 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
444 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
445 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
446 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
447 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
448#define TI_LESS (OC_COMPARE|VV|P(39)|2)
449 TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
450#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
451#define TI_COLON (OC_COLON|xx|P(67)|':')
452 OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
453#define TI_IN (OC_IN|SV|P(49))
454 TI_IN,
455#define TI_COMMA (OC_COMMA|SS|P(80))
456 TI_COMMA,
457#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
458 TI_PGETLINE,
459 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
460 0,
461 0,
462 0,
463 0,
464 0,
465 ST_IF, ST_DO, ST_FOR, OC_BREAK,
466 OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
467 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
468 OC_RETURN|Vx, OC_EXIT|Nx,
469 ST_WHILE,
470 0,
471
472
473
474
475
476
477
478#define OC_B OC_BUILTIN
479#define OC_F OC_FBLTIN
480#define A1 P(0x40)
481#define A2 P(0x80)
482#define A3 P(0xc0)
483#define __v P(1)
484#define _vv P(3)
485#define __s__v P(9)
486#define __s_vv P(0x0b)
487#define __svvv P(0x0f)
488#define _ss_vv P(0x1b)
489#define _s_vv_ P(0x16)
490#define ss_vv_ P(0x36)
491 OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2,
492 OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2,
493 OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2,
494 OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx,
495 OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx,
496 OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2,
497 OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,
498 OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv,
499 OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1,
500 OC_F|F_le|Sx,
501 OC_GETLINE|SV,
502 0, 0,
503 0,
504 0
505#undef A1
506#undef A2
507#undef A3
508#undef OC_B
509#undef OC_F
510};
511
512
513
514enum {
515 CONVFMT, OFMT, FS, OFS,
516 ORS, RS, RT, FILENAME,
517 SUBSEP, F0, ARGIND, ARGC,
518 ARGV, ERRNO, FNR, NR,
519 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
520};
521
522static const char vNames[] ALIGN1 =
523 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
524 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
525 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
526 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
527 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
528
529static const char vValues[] ALIGN1 =
530 "%.6g\0" "%.6g\0" " \0" " \0"
531 "\n\0" "\n\0" "\0" "\0"
532 "\034\0" "\0" "\377";
533
534
535#define FIRST_PRIME 61
536static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
537
538
539
540
541
542
543
544struct globals {
545 double t_double;
546 chain beginseq, mainseq, endseq;
547 chain *seq;
548 node *break_ptr, *continue_ptr;
549 rstream *iF;
550 xhash *ahash;
551 xhash *fnhash;
552 xhash *vhash;
553
554
555 const char *g_progname;
556 int g_lineno;
557 int nfields;
558 int maxfields;
559 var *Fields;
560 char *g_pos;
561 char g_saved_ch;
562 smallint icase;
563 smallint exiting;
564 smallint nextrec;
565 smallint nextfile;
566 smallint is_f0_split;
567 smallint t_rollback;
568
569
570 smallint next_token__concat_inserted;
571 uint32_t next_token__save_tclass;
572 uint32_t next_token__save_info;
573};
574struct globals2 {
575 uint32_t t_info;
576 uint32_t t_tclass;
577 char *t_string;
578 int t_lineno;
579
580 var *intvar[NUM_INTERNAL_VARS];
581
582
583 char *split_f0__fstrings;
584
585 rstream next_input_file__rsm;
586 smallint next_input_file__files_happen;
587
588 smalluint exitcode;
589
590 unsigned evaluate__seed;
591 var *evaluate__fnargs;
592 regex_t evaluate__sreg;
593
594 var ptest__tmpvar;
595 var awk_printf__tmpvar;
596 var as_regex__tmpvar;
597 var exit__tmpvar;
598 var main__tmpvar;
599
600 tsplitter exec_builtin__tspl;
601
602
603 tsplitter fsplitter, rsplitter;
604
605 char g_buf[MAXVARFMT + 1];
606};
607#define G1 (ptr_to_globals[-1])
608#define G (*(struct globals2 *)ptr_to_globals)
609
610
611
612
613
614#define t_double (G1.t_double )
615#define beginseq (G1.beginseq )
616#define mainseq (G1.mainseq )
617#define endseq (G1.endseq )
618#define seq (G1.seq )
619#define break_ptr (G1.break_ptr )
620#define continue_ptr (G1.continue_ptr)
621#define iF (G1.iF )
622#define ahash (G1.ahash )
623#define fnhash (G1.fnhash )
624#define vhash (G1.vhash )
625#define fdhash ahash
626
627
628
629#define g_progname (G1.g_progname )
630#define g_lineno (G1.g_lineno )
631#define nfields (G1.nfields )
632#define maxfields (G1.maxfields )
633#define Fields (G1.Fields )
634#define g_pos (G1.g_pos )
635#define g_saved_ch (G1.g_saved_ch )
636#define icase (G1.icase )
637#define exiting (G1.exiting )
638#define nextrec (G1.nextrec )
639#define nextfile (G1.nextfile )
640#define is_f0_split (G1.is_f0_split )
641#define t_rollback (G1.t_rollback )
642#define t_info (G.t_info )
643#define t_tclass (G.t_tclass )
644#define t_string (G.t_string )
645#define t_lineno (G.t_lineno )
646#define intvar (G.intvar )
647#define fsplitter (G.fsplitter )
648#define rsplitter (G.rsplitter )
649#define g_buf (G.g_buf )
650#define INIT_G() do { \
651 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
652 t_tclass = TC_NEWLINE; \
653 G.evaluate__seed = 1; \
654} while (0)
655
656static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
657static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
658static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
659static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
660static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments";
661static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
662static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
663static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
664static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
665static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
666
667static int awk_exit(void) NORETURN;
668
669static void syntax_error(const char *message) NORETURN;
670static void syntax_error(const char *message)
671{
672 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
673}
674
675
676
677static unsigned hashidx(const char *name)
678{
679 unsigned idx = 0;
680
681 while (*name)
682 idx = *name++ + (idx << 6) - idx;
683 return idx;
684}
685
686
687static xhash *hash_init(void)
688{
689 xhash *newhash;
690
691 newhash = xzalloc(sizeof(*newhash));
692 newhash->csize = FIRST_PRIME;
693 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
694
695 return newhash;
696}
697
698static void hash_clear(xhash *hash)
699{
700 unsigned i;
701 hash_item *hi, *thi;
702
703 for (i = 0; i < hash->csize; i++) {
704 hi = hash->items[i];
705 while (hi) {
706 thi = hi;
707 hi = hi->next;
708
709 free(thi->data.v.string);
710 free(thi);
711 }
712 hash->items[i] = NULL;
713 }
714 hash->glen = hash->nel = 0;
715}
716
717#if 0
718static void hash_free(xhash *hash)
719{
720 hash_clear(hash);
721 free(hash->items);
722 free(hash);
723}
724#endif
725
726
727static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
728{
729 hash_item *hi;
730
731 hi = hash->items[idx % hash->csize];
732 while (hi) {
733 if (strcmp(hi->name, name) == 0)
734 return &hi->data;
735 hi = hi->next;
736 }
737 return NULL;
738}
739
740static void *hash_search(xhash *hash, const char *name)
741{
742 return hash_search3(hash, name, hashidx(name));
743}
744
745
746static void hash_rebuild(xhash *hash)
747{
748 unsigned newsize, i, idx;
749 hash_item **newitems, *hi, *thi;
750
751 if (hash->nprime == ARRAY_SIZE(PRIMES))
752 return;
753
754 newsize = PRIMES[hash->nprime++];
755 newitems = xzalloc(newsize * sizeof(newitems[0]));
756
757 for (i = 0; i < hash->csize; i++) {
758 hi = hash->items[i];
759 while (hi) {
760 thi = hi;
761 hi = thi->next;
762 idx = hashidx(thi->name) % newsize;
763 thi->next = newitems[idx];
764 newitems[idx] = thi;
765 }
766 }
767
768 free(hash->items);
769 hash->csize = newsize;
770 hash->items = newitems;
771}
772
773
774static void *hash_find(xhash *hash, const char *name)
775{
776 hash_item *hi;
777 unsigned idx;
778 int l;
779
780 idx = hashidx(name);
781 hi = hash_search3(hash, name, idx);
782 if (!hi) {
783 if (++hash->nel > hash->csize * 8)
784 hash_rebuild(hash);
785
786 l = strlen(name) + 1;
787 hi = xzalloc(sizeof(*hi) + l);
788 strcpy(hi->name, name);
789
790 idx = idx % hash->csize;
791 hi->next = hash->items[idx];
792 hash->items[idx] = hi;
793 hash->glen += l;
794 }
795 return &hi->data;
796}
797
798#define findvar(hash, name) ((var*) hash_find((hash), (name)))
799#define newvar(name) ((var*) hash_find(vhash, (name)))
800#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
801#define newfunc(name) ((func*) hash_find(fnhash, (name)))
802
803static void hash_remove(xhash *hash, const char *name)
804{
805 hash_item *hi, **phi;
806
807 phi = &hash->items[hashidx(name) % hash->csize];
808 while (*phi) {
809 hi = *phi;
810 if (strcmp(hi->name, name) == 0) {
811 hash->glen -= (strlen(name) + 1);
812 hash->nel--;
813 *phi = hi->next;
814 free(hi);
815 break;
816 }
817 phi = &hi->next;
818 }
819}
820
821
822
823static char *skip_spaces(char *p)
824{
825 for (;;) {
826 if (*p == '\\' && p[1] == '\n') {
827 p++;
828 t_lineno++;
829 } else if (*p != ' ' && *p != '\t') {
830 break;
831 }
832 p++;
833 }
834 return p;
835}
836
837
838static char *nextword(char **s)
839{
840 char *p = *s;
841 char *q = p;
842 while (*q++ != '\0')
843 continue;
844 *s = q;
845 return p;
846}
847
848static char nextchar(char **s)
849{
850 char c, *pps;
851
852 c = *(*s)++;
853 pps = *s;
854 if (c == '\\')
855 c = bb_process_escape_sequence((const char**)s);
856
857
858
859
860 if (c == '\\' && *s == pps) {
861 c = *(*s);
862 if (c)
863 (*s)++;
864 }
865 return c;
866}
867
868
869
870static void unescape_string_in_place(char *s1)
871{
872 char *s = s1;
873 while ((*s1 = nextchar(&s)) != '\0')
874 s1++;
875}
876
877static ALWAYS_INLINE int isalnum_(int c)
878{
879 return (isalnum(c) || c == '_');
880}
881
882static double my_strtod(char **pp)
883{
884 char *cp = *pp;
885 if (ENABLE_DESKTOP && cp[0] == '0') {
886
887 char c = (cp[1] | 0x20);
888 if (c == 'x' || isdigit(cp[1])) {
889 unsigned long long ull = strtoull(cp, pp, 0);
890 if (c == 'x')
891 return ull;
892 c = **pp;
893 if (!isdigit(c) && c != '.')
894 return ull;
895
896
897
898
899
900 }
901 }
902 return strtod(cp, pp);
903}
904
905
906
907static void fmt_num(const char *format, double n)
908{
909 if (n == (long long)n) {
910 snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
911 } else {
912 const char *s = format;
913 char c;
914
915 do { c = *s; } while (c && *++s);
916 if (strchr("diouxX", c)) {
917 snprintf(g_buf, MAXVARFMT, format, (int)n);
918 } else if (strchr("eEfFgGaA", c)) {
919 snprintf(g_buf, MAXVARFMT, format, n);
920 } else {
921 syntax_error(EMSG_INV_FMT);
922 }
923 }
924}
925
926static xhash *iamarray(var *a)
927{
928 while (a->type & VF_CHILD)
929 a = a->x.parent;
930
931 if (!(a->type & VF_ARRAY)) {
932 a->type |= VF_ARRAY;
933 a->x.array = hash_init();
934 }
935 return a->x.array;
936}
937
938#define clear_array(array) hash_clear(array)
939
940
941static var *clrvar(var *v)
942{
943 if (!(v->type & VF_FSTR))
944 free(v->string);
945
946 v->type &= VF_DONTTOUCH;
947 v->type |= VF_DIRTY;
948 v->string = NULL;
949 return v;
950}
951
952static void handle_special(var *);
953
954
955static var *setvar_p(var *v, char *value)
956{
957 clrvar(v);
958 v->string = value;
959 handle_special(v);
960 return v;
961}
962
963
964static var *setvar_s(var *v, const char *value)
965{
966 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
967}
968
969
970static var *setvar_u(var *v, const char *value)
971{
972 v = setvar_s(v, value);
973 v->type |= VF_USER;
974 return v;
975}
976
977
978static void setari_u(var *a, int idx, const char *s)
979{
980 var *v;
981
982 v = findvar(iamarray(a), itoa(idx));
983 setvar_u(v, s);
984}
985
986
987static var *setvar_i(var *v, double value)
988{
989 clrvar(v);
990 v->type |= VF_NUMBER;
991 v->number = value;
992 handle_special(v);
993 return v;
994}
995
996static const char *getvar_s(var *v)
997{
998
999 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
1000 fmt_num(getvar_s(intvar[CONVFMT]), v->number);
1001 v->string = xstrdup(g_buf);
1002 v->type |= VF_CACHED;
1003 }
1004 return (v->string == NULL) ? "" : v->string;
1005}
1006
1007static double getvar_i(var *v)
1008{
1009 char *s;
1010
1011 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
1012 v->number = 0;
1013 s = v->string;
1014 if (s && *s) {
1015 debug_printf_eval("getvar_i: '%s'->", s);
1016 v->number = my_strtod(&s);
1017 debug_printf_eval("%f (s:'%s')\n", v->number, s);
1018 if (v->type & VF_USER) {
1019
1020 s = skip_spaces(s);
1021 if (*s != '\0')
1022 v->type &= ~VF_USER;
1023 }
1024 } else {
1025 debug_printf_eval("getvar_i: '%s'->zero\n", s);
1026 v->type &= ~VF_USER;
1027 }
1028 v->type |= VF_CACHED;
1029 }
1030 debug_printf_eval("getvar_i: %f\n", v->number);
1031 return v->number;
1032}
1033
1034
1035static unsigned long getvar_i_int(var *v)
1036{
1037 double d = getvar_i(v);
1038
1039
1040
1041 if (d >= 0)
1042 return (unsigned long)d;
1043
1044 return - (long) (unsigned long) (-d);
1045}
1046
1047static var *copyvar(var *dest, const var *src)
1048{
1049 if (dest != src) {
1050 clrvar(dest);
1051 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
1052 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
1053 dest->number = src->number;
1054 if (src->string)
1055 dest->string = xstrdup(src->string);
1056 }
1057 handle_special(dest);
1058 return dest;
1059}
1060
1061static var *incvar(var *v)
1062{
1063 return setvar_i(v, getvar_i(v) + 1.0);
1064}
1065
1066
1067static int is_numeric(var *v)
1068{
1069 getvar_i(v);
1070 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
1071}
1072
1073
1074static int istrue(var *v)
1075{
1076 if (is_numeric(v))
1077 return (v->number != 0);
1078 return (v->string && v->string[0]);
1079}
1080
1081
1082
1083
1084
1085
1086
1087static uint32_t next_token(uint32_t expected)
1088{
1089#define concat_inserted (G1.next_token__concat_inserted)
1090#define save_tclass (G1.next_token__save_tclass)
1091#define save_info (G1.next_token__save_info)
1092
1093 char *p;
1094 const char *tl;
1095 const uint32_t *ti;
1096 uint32_t tc, last_token_class;
1097
1098 last_token_class = t_tclass;
1099
1100 debug_printf_parse("%s() expected(%x):", __func__, expected);
1101 debug_parse_print_tc(expected);
1102 debug_printf_parse("\n");
1103
1104 if (t_rollback) {
1105 debug_printf_parse("%s: using rolled-back token\n", __func__);
1106 t_rollback = FALSE;
1107 } else if (concat_inserted) {
1108 debug_printf_parse("%s: using concat-inserted token\n", __func__);
1109 concat_inserted = FALSE;
1110 t_tclass = save_tclass;
1111 t_info = save_info;
1112 } else {
1113 p = g_pos;
1114 if (g_saved_ch != '\0') {
1115 *p = g_saved_ch;
1116 g_saved_ch = '\0';
1117 }
1118 readnext:
1119 p = skip_spaces(p);
1120 g_lineno = t_lineno;
1121 if (*p == '#')
1122 while (*p != '\n' && *p != '\0')
1123 p++;
1124
1125 if (*p == '\0') {
1126 tc = TC_EOF;
1127 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1128 } else if (*p == '\"') {
1129
1130 char *s = t_string = ++p;
1131 while (*p != '\"') {
1132 char *pp;
1133 if (*p == '\0' || *p == '\n')
1134 syntax_error(EMSG_UNEXP_EOS);
1135 pp = p;
1136 *s++ = nextchar(&pp);
1137 p = pp;
1138 }
1139 p++;
1140 *s = '\0';
1141 tc = TC_STRING;
1142 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1143 } else if ((expected & TC_REGEXP) && *p == '/') {
1144
1145 char *s = t_string = ++p;
1146 while (*p != '/') {
1147 if (*p == '\0' || *p == '\n')
1148 syntax_error(EMSG_UNEXP_EOS);
1149 *s = *p++;
1150 if (*s++ == '\\') {
1151 char *pp = p;
1152 s[-1] = bb_process_escape_sequence((const char **)&pp);
1153 if (*p == '\\')
1154 *s++ = '\\';
1155 if (pp == p)
1156 *s++ = *p++;
1157 else
1158 p = pp;
1159 }
1160 }
1161 p++;
1162 *s = '\0';
1163 tc = TC_REGEXP;
1164 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1165
1166 } else if (*p == '.' || isdigit(*p)) {
1167
1168 char *pp = p;
1169 t_double = my_strtod(&pp);
1170 p = pp;
1171 if (*p == '.')
1172 syntax_error(EMSG_UNEXP_TOKEN);
1173 tc = TC_NUMBER;
1174 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1175 } else {
1176 char *end_of_name;
1177
1178 if (*p == '\n')
1179 t_lineno++;
1180
1181
1182 tl = tokenlist;
1183 tc = 0x00000001;
1184 ti = tokeninfo;
1185 while (*tl) {
1186 int l = (unsigned char) *tl++;
1187 if (l == (unsigned char) NTCC) {
1188 tc <<= 1;
1189 continue;
1190 }
1191
1192
1193
1194
1195 if ((tc & (expected | TS_WORD | TC_NEWLINE))
1196 && strncmp(p, tl, l) == 0
1197 && !((tc & TS_WORD) && isalnum_(p[l]))
1198 ) {
1199
1200 t_info = *ti;
1201 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1202 p += l;
1203 goto token_found;
1204 }
1205 ti++;
1206 tl += l;
1207 }
1208
1209
1210
1211 if (!isalnum_(*p))
1212 syntax_error(EMSG_UNEXP_TOKEN);
1213
1214 t_string = p;
1215 while (isalnum_(*p))
1216 p++;
1217 end_of_name = p;
1218
1219 if (last_token_class == TC_FUNCDECL)
1220
1221 p = skip_spaces(p);
1222 else if (expected & TC_ARRAY) {
1223
1224 char *s = skip_spaces(p);
1225 if (*s == '[')
1226 p = s;
1227 }
1228
1229
1230
1231
1232
1233
1234
1235
1236 if (*p == '(') {
1237 p++;
1238 tc = TC_FUNCTION;
1239 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1240 } else if (*p == '[') {
1241 p++;
1242 tc = TC_ARRAY;
1243 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1244 } else {
1245 tc = TC_VARIABLE;
1246 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1247 if (end_of_name == p) {
1248
1249
1250
1251
1252 g_saved_ch = *end_of_name;
1253
1254
1255
1256
1257 }
1258 }
1259 *end_of_name = '\0';
1260 }
1261 token_found:
1262 g_pos = p;
1263
1264
1265 if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
1266 goto readnext;
1267
1268
1269 debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
1270 (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
1271 !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
1272 if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
1273 && !(last_token_class == TC_LENGTH && tc == TC_LPAREN)
1274 ) {
1275 concat_inserted = TRUE;
1276 save_tclass = tc;
1277 save_info = t_info;
1278 tc = TC_BINOPX;
1279 t_info = OC_CONCAT | SS | P(35);
1280 }
1281
1282 t_tclass = tc;
1283 debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
1284 }
1285
1286 if (!(t_tclass & expected)) {
1287 syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
1288 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1289 }
1290
1291 debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
1292 debug_parse_print_tc(t_tclass);
1293 debug_printf_parse("\n");
1294
1295 return t_tclass;
1296#undef concat_inserted
1297#undef save_tclass
1298#undef save_info
1299}
1300
1301static ALWAYS_INLINE void rollback_token(void)
1302{
1303 t_rollback = TRUE;
1304}
1305
1306static node *new_node(uint32_t info)
1307{
1308 node *n;
1309
1310 n = xzalloc(sizeof(node));
1311 n->info = info;
1312 n->lineno = g_lineno;
1313 return n;
1314}
1315
1316static void mk_re_node(const char *s, node *n, regex_t *re)
1317{
1318 n->info = TI_REGEXP;
1319 n->l.re = re;
1320 n->r.ire = re + 1;
1321 xregcomp(re, s, REG_EXTENDED);
1322 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1323}
1324
1325static node *parse_expr(uint32_t);
1326
1327static node *parse_lrparen_list(void)
1328{
1329 next_token(TC_LPAREN);
1330 return parse_expr(TC_RPAREN);
1331}
1332
1333
1334
1335static node *parse_expr(uint32_t term_tc)
1336{
1337 node sn;
1338 node *cn = &sn;
1339 node *vn, *glptr;
1340 uint32_t tc, expected_tc;
1341 var *v;
1342
1343 debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
1344 debug_parse_print_tc(term_tc);
1345 debug_printf_parse("\n");
1346
1347 sn.info = PRIMASK;
1348 sn.r.n = sn.a.n = glptr = NULL;
1349 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
1350
1351 while (!((tc = next_token(expected_tc)) & term_tc)) {
1352
1353 if (glptr && (t_info == TI_LESS)) {
1354
1355 debug_printf_parse("%s: input redir\n", __func__);
1356 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1357 cn->a.n = glptr;
1358 expected_tc = TS_OPERAND | TS_UOPPRE;
1359 glptr = NULL;
1360 continue;
1361 }
1362 if (tc & (TS_BINOP | TC_UOPPOST)) {
1363 debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1364
1365
1366 vn = cn;
1367 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1368 || ((t_info == vn->info) && t_info == TI_COLON)
1369 ) {
1370 vn = vn->a.n;
1371 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1372 }
1373 if (t_info == TI_TERNARY)
1374
1375 t_info += P(6);
1376 cn = vn->a.n->r.n = new_node(t_info);
1377 cn->a.n = vn->a.n;
1378 if (tc & TS_BINOP) {
1379 cn->l.n = vn;
1380
1381
1382
1383
1384
1385
1386 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1387 if (t_info == TI_PGETLINE) {
1388
1389 next_token(TC_GETLINE);
1390
1391 cn->info &= ~PRIMASK;
1392 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1393 }
1394 } else {
1395 cn->r.n = vn;
1396 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1397 }
1398 vn->a.n = cn;
1399 continue;
1400 }
1401
1402 debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
1403
1404
1405 vn = cn;
1406 cn = vn->r.n = new_node(t_info);
1407 cn->a.n = vn;
1408
1409 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1410 if (t_info == TI_PREINC || t_info == TI_PREDEC)
1411 expected_tc = TS_LVALUE | TC_UOPPRE1;
1412
1413 if (!(tc & (TS_OPERAND | TC_REGEXP)))
1414 continue;
1415
1416 debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
1417 expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
1418
1419
1420 switch (tc) {
1421 case TC_VARIABLE:
1422 case TC_ARRAY:
1423 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1424 cn->info = OC_VAR;
1425 v = hash_search(ahash, t_string);
1426 if (v != NULL) {
1427 cn->info = OC_FNARG;
1428 cn->l.aidx = v->x.aidx;
1429 } else {
1430 cn->l.v = newvar(t_string);
1431 }
1432 if (tc & TC_ARRAY) {
1433 cn->info |= xS;
1434 cn->r.n = parse_expr(TC_ARRTERM);
1435 }
1436 break;
1437
1438 case TC_NUMBER:
1439 case TC_STRING:
1440 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1441 cn->info = OC_VAR;
1442 v = cn->l.v = xzalloc(sizeof(var));
1443 if (tc & TC_NUMBER)
1444 setvar_i(v, t_double);
1445 else {
1446 setvar_s(v, t_string);
1447 expected_tc &= ~TC_UOPPOST;
1448 }
1449 break;
1450
1451 case TC_REGEXP:
1452 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1453 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1454 break;
1455
1456 case TC_FUNCTION:
1457 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1458 cn->info = OC_FUNC;
1459 cn->r.f = newfunc(t_string);
1460 cn->l.n = parse_expr(TC_RPAREN);
1461 break;
1462
1463 case TC_LPAREN:
1464 debug_printf_parse("%s: TC_LPAREN\n", __func__);
1465 cn = vn->r.n = parse_expr(TC_RPAREN);
1466 if (!cn)
1467 syntax_error("Empty sequence");
1468 cn->a.n = vn;
1469 break;
1470
1471 case TC_GETLINE:
1472 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1473 glptr = cn;
1474 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1475 break;
1476
1477 case TC_BUILTIN:
1478 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1479 cn->l.n = parse_lrparen_list();
1480 break;
1481
1482 case TC_LENGTH:
1483 debug_printf_parse("%s: TC_LENGTH\n", __func__);
1484 tc = next_token(TC_LPAREN
1485 | TC_SEMICOL
1486 | TC_NEWLINE
1487 | TC_RBRACE
1488 | TC_BINOPX
1489 | TC_COMMA
1490 );
1491 if (tc != TC_LPAREN)
1492 rollback_token();
1493 else {
1494
1495 cn->l.n = parse_expr(TC_RPAREN);
1496 }
1497 break;
1498 }
1499 }
1500
1501 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1502 return sn.r.n;
1503}
1504
1505
1506static node *chain_node(uint32_t info)
1507{
1508 node *n;
1509
1510 if (!seq->first)
1511 seq->first = seq->last = new_node(0);
1512
1513 if (seq->programname != g_progname) {
1514 seq->programname = g_progname;
1515 n = chain_node(OC_NEWSOURCE);
1516 n->l.new_progname = g_progname;
1517 }
1518
1519 n = seq->last;
1520 n->info = info;
1521 seq->last = n->a.n = new_node(OC_DONE);
1522
1523 return n;
1524}
1525
1526static void chain_expr(uint32_t info)
1527{
1528 node *n;
1529
1530 n = chain_node(info);
1531
1532 n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1533 if ((info & OF_REQUIRED) && !n->l.n)
1534 syntax_error(EMSG_TOO_FEW_ARGS);
1535
1536 if (t_tclass & TC_RBRACE)
1537 rollback_token();
1538}
1539
1540static void chain_group(void);
1541
1542static node *chain_loop(node *nn)
1543{
1544 node *n, *n2, *save_brk, *save_cont;
1545
1546 save_brk = break_ptr;
1547 save_cont = continue_ptr;
1548
1549 n = chain_node(OC_BR | Vx);
1550 continue_ptr = new_node(OC_EXEC);
1551 break_ptr = new_node(OC_EXEC);
1552 chain_group();
1553 n2 = chain_node(OC_EXEC | Vx);
1554 n2->l.n = nn;
1555 n2->a.n = n;
1556 continue_ptr->a.n = n2;
1557 break_ptr->a.n = n->r.n = seq->last;
1558
1559 continue_ptr = save_cont;
1560 break_ptr = save_brk;
1561
1562 return n;
1563}
1564
1565static void chain_until_rbrace(void)
1566{
1567 uint32_t tc;
1568 while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1569 debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1570 if (tc == TC_NEWLINE)
1571 continue;
1572 rollback_token();
1573 chain_group();
1574 }
1575 debug_printf_parse("%s: TC_RBRACE\n", __func__);
1576}
1577
1578
1579static void chain_group(void)
1580{
1581 uint32_t tc;
1582 node *n, *n2, *n3;
1583
1584 do {
1585 tc = next_token(TS_GRPSEQ);
1586 } while (tc == TC_NEWLINE);
1587
1588 if (tc == TC_LBRACE) {
1589 debug_printf_parse("%s: TC_LBRACE\n", __func__);
1590 chain_until_rbrace();
1591 return;
1592 }
1593 if (tc & (TS_OPSEQ | TC_SEMICOL)) {
1594 debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
1595 rollback_token();
1596 chain_expr(OC_EXEC | Vx);
1597 return;
1598 }
1599
1600
1601 debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
1602 switch (t_info & OPCLSMASK) {
1603 case ST_IF:
1604 debug_printf_parse("%s: ST_IF\n", __func__);
1605 n = chain_node(OC_BR | Vx);
1606 n->l.n = parse_lrparen_list();
1607 chain_group();
1608 n2 = chain_node(OC_EXEC);
1609 n->r.n = seq->last;
1610 if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
1611 chain_group();
1612 n2->a.n = seq->last;
1613 } else {
1614 rollback_token();
1615 }
1616 break;
1617
1618 case ST_WHILE:
1619 debug_printf_parse("%s: ST_WHILE\n", __func__);
1620 n2 = parse_lrparen_list();
1621 n = chain_loop(NULL);
1622 n->l.n = n2;
1623 break;
1624
1625 case ST_DO:
1626 debug_printf_parse("%s: ST_DO\n", __func__);
1627 n2 = chain_node(OC_EXEC);
1628 n = chain_loop(NULL);
1629 n2->a.n = n->a.n;
1630 next_token(TC_WHILE);
1631 n->l.n = parse_lrparen_list();
1632 break;
1633
1634 case ST_FOR:
1635 debug_printf_parse("%s: ST_FOR\n", __func__);
1636 next_token(TC_LPAREN);
1637 n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
1638 if (t_tclass & TC_RPAREN) {
1639 if (!n2 || n2->info != TI_IN)
1640 syntax_error(EMSG_UNEXP_TOKEN);
1641 n = chain_node(OC_WALKINIT | VV);
1642 n->l.n = n2->l.n;
1643 n->r.n = n2->r.n;
1644 n = chain_loop(NULL);
1645 n->info = OC_WALKNEXT | Vx;
1646 n->l.n = n2->l.n;
1647 } else {
1648 n = chain_node(OC_EXEC | Vx);
1649 n->l.n = n2;
1650 n2 = parse_expr(TC_SEMICOL);
1651 n3 = parse_expr(TC_RPAREN);
1652 n = chain_loop(n3);
1653 n->l.n = n2;
1654 if (!n2)
1655 n->info = OC_EXEC;
1656 }
1657 break;
1658
1659 case OC_PRINT:
1660 case OC_PRINTF:
1661 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1662 n = chain_node(t_info);
1663 n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
1664 if (t_tclass & TC_OUTRDR) {
1665 n->info |= t_info;
1666 n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1667 }
1668 if (t_tclass & TC_RBRACE)
1669 rollback_token();
1670 break;
1671
1672 case OC_BREAK:
1673 debug_printf_parse("%s: OC_BREAK\n", __func__);
1674 n = chain_node(OC_EXEC);
1675 if (!break_ptr)
1676 syntax_error("'break' not in a loop");
1677 n->a.n = break_ptr;
1678 chain_expr(t_info);
1679 break;
1680
1681 case OC_CONTINUE:
1682 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1683 n = chain_node(OC_EXEC);
1684 if (!continue_ptr)
1685 syntax_error("'continue' not in a loop");
1686 n->a.n = continue_ptr;
1687 chain_expr(t_info);
1688 break;
1689
1690
1691 default:
1692 debug_printf_parse("%s: default\n", __func__);
1693 chain_expr(t_info);
1694 }
1695}
1696
1697static void parse_program(char *p)
1698{
1699 debug_printf_parse("%s()\n", __func__);
1700
1701 g_pos = p;
1702 t_lineno = 1;
1703 for (;;) {
1704 uint32_t tclass;
1705
1706 tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1707 | TC_EOF | TC_NEWLINE );
1708 got_tok:
1709 if (tclass == TC_EOF) {
1710 debug_printf_parse("%s: TC_EOF\n", __func__);
1711 break;
1712 }
1713 if (tclass == TC_NEWLINE) {
1714 debug_printf_parse("%s: TC_NEWLINE\n", __func__);
1715 continue;
1716 }
1717 if (tclass == TC_BEGIN) {
1718 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1719 seq = &beginseq;
1720
1721 next_token(TC_LBRACE);
1722 chain_until_rbrace();
1723 goto next_tok;
1724 }
1725 if (tclass == TC_END) {
1726 debug_printf_parse("%s: TC_END\n", __func__);
1727 seq = &endseq;
1728
1729 next_token(TC_LBRACE);
1730 chain_until_rbrace();
1731 goto next_tok;
1732 }
1733 if (tclass == TC_FUNCDECL) {
1734 func *f;
1735
1736 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1737 next_token(TC_FUNCTION);
1738 f = newfunc(t_string);
1739 if (f->defined)
1740 syntax_error("Duplicate function");
1741 f->defined = 1;
1742
1743
1744
1745 for (;;) {
1746 var *v;
1747 if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1748 if (f->nargs == 0)
1749 break;
1750
1751 syntax_error(EMSG_UNEXP_TOKEN);
1752 }
1753 v = findvar(ahash, t_string);
1754 v->x.aidx = f->nargs++;
1755
1756 if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
1757 break;
1758
1759 }
1760 seq = &f->body;
1761
1762 while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1763 continue;
1764 chain_until_rbrace();
1765 hash_clear(ahash);
1766 goto next_tok;
1767 }
1768 seq = &mainseq;
1769 if (tclass & TS_OPSEQ) {
1770 node *cn;
1771
1772 debug_printf_parse("%s: TS_OPSEQ\n", __func__);
1773 rollback_token();
1774 cn = chain_node(OC_TEST);
1775 cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
1776 if (t_tclass == TC_LBRACE) {
1777 debug_printf_parse("%s: TC_LBRACE\n", __func__);
1778 chain_until_rbrace();
1779 } else {
1780
1781 debug_printf_parse("%s: !TC_LBRACE\n", __func__);
1782 chain_node(OC_PRINT);
1783 }
1784 cn->r.n = mainseq.last;
1785 goto next_tok;
1786 }
1787
1788 debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1789 chain_until_rbrace();
1790 next_tok:
1791
1792 tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1793 | TC_EOF | TC_NEWLINE | TC_SEMICOL);
1794
1795
1796
1797
1798
1799
1800 if (tclass != TC_SEMICOL)
1801 goto got_tok;
1802
1803 }
1804}
1805
1806
1807
1808
1809static var *nvalloc(int sz)
1810{
1811 return xzalloc(sz * sizeof(var));
1812}
1813
1814static void nvfree(var *v, int sz)
1815{
1816 var *p = v;
1817
1818 while (--sz >= 0) {
1819 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1820 clear_array(iamarray(p));
1821 free(p->x.array->items);
1822 free(p->x.array);
1823 }
1824 if (p->type & VF_WALK) {
1825 walker_list *n;
1826 walker_list *w = p->x.walker;
1827 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1828 p->x.walker = NULL;
1829 while (w) {
1830 n = w->prev;
1831 debug_printf_walker(" free(%p)\n", w);
1832 free(w);
1833 w = n;
1834 }
1835 }
1836 clrvar(p);
1837 p++;
1838 }
1839
1840 free(v);
1841}
1842
1843static node *mk_splitter(const char *s, tsplitter *spl)
1844{
1845 regex_t *re, *ire;
1846 node *n;
1847
1848 re = &spl->re[0];
1849 ire = &spl->re[1];
1850 n = &spl->n;
1851 if (n->info == TI_REGEXP) {
1852 regfree(re);
1853 regfree(ire);
1854 }
1855 if (s[0] && s[1]) {
1856 mk_re_node(s, n, re);
1857 } else {
1858 n->info = (uint32_t) s[0];
1859 }
1860
1861 return n;
1862}
1863
1864static var *evaluate(node *, var *);
1865
1866
1867
1868
1869
1870static regex_t *as_regex(node *op, regex_t *preg)
1871{
1872 int cflags;
1873 const char *s;
1874
1875 if (op->info == TI_REGEXP) {
1876 return icase ? op->r.ire : op->l.re;
1877 }
1878
1879
1880#define TMPVAR (&G.as_regex__tmpvar)
1881
1882
1883
1884
1885 s = getvar_s(evaluate(op, TMPVAR));
1886
1887 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1888
1889
1890
1891
1892
1893 if (regcomp(preg, s, cflags)) {
1894 cflags &= ~REG_EXTENDED;
1895 xregcomp(preg, s, cflags);
1896 }
1897
1898#undef TMPVAR
1899 return preg;
1900}
1901
1902
1903
1904
1905
1906static char* qrealloc(char *b, int n, int *size)
1907{
1908 if (!b || n >= *size) {
1909 *size = n + (n>>1) + 80;
1910 b = xrealloc(b, *size);
1911 }
1912 return b;
1913}
1914
1915
1916static void fsrealloc(int size)
1917{
1918 int i, newsize;
1919
1920 if (size >= maxfields) {
1921
1922 if (size > 0xffffff)
1923 bb_die_memory_exhausted();
1924
1925 i = maxfields;
1926 maxfields = size + 16;
1927
1928 newsize = maxfields * sizeof(Fields[0]);
1929 debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
1930 Fields = xrealloc(Fields, newsize);
1931 debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
1932
1933
1934 for (; i < maxfields; i++) {
1935 Fields[i].type = VF_SPECIAL;
1936 Fields[i].string = NULL;
1937 }
1938 }
1939
1940 for (i = size; i < nfields; i++) {
1941 clrvar(Fields + i);
1942 }
1943 nfields = size;
1944}
1945
1946static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[])
1947{
1948 int r = regexec(preg, s, 1, pmatch, 0);
1949 if (r == 0 && pmatch[0].rm_eo == 0) {
1950
1951
1952
1953
1954
1955
1956 size_t ofs = 0;
1957 do {
1958 ofs++;
1959 if (!s[ofs])
1960 return REG_NOMATCH;
1961 regexec(preg, s + ofs, 1, pmatch, 0);
1962 } while (pmatch[0].rm_eo == 0);
1963 pmatch[0].rm_so += ofs;
1964 pmatch[0].rm_eo += ofs;
1965 }
1966 return r;
1967}
1968
1969static int awk_split(const char *s, node *spl, char **slist)
1970{
1971 int n;
1972 char c[4];
1973 char *s1;
1974
1975
1976 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1977 strcpy(s1, s);
1978
1979 c[0] = c[1] = (char)spl->info;
1980 c[2] = c[3] = '\0';
1981 if (*getvar_s(intvar[RS]) == '\0')
1982 c[2] = '\n';
1983
1984 n = 0;
1985 if (spl->info == TI_REGEXP) {
1986 if (!*s)
1987 return n;
1988 n++;
1989 do {
1990 int l;
1991 regmatch_t pmatch[1];
1992
1993 l = strcspn(s, c+2);
1994 if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
1995 && pmatch[0].rm_so <= l
1996 ) {
1997
1998 l = pmatch[0].rm_so;
1999 n++;
2000 } else {
2001 pmatch[0].rm_eo = l;
2002 if (s[l])
2003 pmatch[0].rm_eo++;
2004 }
2005 s1 = mempcpy(s1, s, l);
2006 *s1++ = '\0';
2007 s += pmatch[0].rm_eo;
2008 } while (*s);
2009
2010
2011
2012
2013 *s1 = '\0';
2014
2015 return n;
2016 }
2017 if (c[0] == '\0') {
2018 while (*s) {
2019 *s1++ = *s++;
2020 *s1++ = '\0';
2021 n++;
2022 }
2023 return n;
2024 }
2025 if (c[0] != ' ') {
2026 if (icase) {
2027 c[0] = toupper(c[0]);
2028 c[1] = tolower(c[1]);
2029 }
2030 if (*s1)
2031 n++;
2032 while ((s1 = strpbrk(s1, c)) != NULL) {
2033 *s1++ = '\0';
2034 n++;
2035 }
2036 return n;
2037 }
2038
2039 while (*s) {
2040 s = skip_whitespace(s);
2041 if (!*s)
2042 break;
2043 n++;
2044 while (*s && !isspace(*s))
2045 *s1++ = *s++;
2046 *s1++ = '\0';
2047 }
2048 return n;
2049}
2050
2051static void split_f0(void)
2052{
2053
2054#define fstrings (G.split_f0__fstrings)
2055
2056 int i, n;
2057 char *s;
2058
2059 if (is_f0_split)
2060 return;
2061
2062 is_f0_split = TRUE;
2063 free(fstrings);
2064 fsrealloc(0);
2065 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
2066 fsrealloc(n);
2067 s = fstrings;
2068 for (i = 0; i < n; i++) {
2069 Fields[i].string = nextword(&s);
2070 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
2071 }
2072
2073
2074 clrvar(intvar[NF]);
2075 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
2076 intvar[NF]->number = nfields;
2077#undef fstrings
2078}
2079
2080
2081static void handle_special(var *v)
2082{
2083 int n;
2084 char *b;
2085 const char *sep, *s;
2086 int sl, l, len, i, bsize;
2087
2088 if (!(v->type & VF_SPECIAL))
2089 return;
2090
2091 if (v == intvar[NF]) {
2092 n = (int)getvar_i(v);
2093 if (n < 0)
2094 syntax_error("NF set to negative value");
2095 fsrealloc(n);
2096
2097
2098 sep = getvar_s(intvar[OFS]);
2099 sl = strlen(sep);
2100 b = NULL;
2101 len = 0;
2102 for (i = 0; i < n; i++) {
2103 s = getvar_s(&Fields[i]);
2104 l = strlen(s);
2105 if (b) {
2106 memcpy(b+len, sep, sl);
2107 len += sl;
2108 }
2109 b = qrealloc(b, len+l+sl, &bsize);
2110 memcpy(b+len, s, l);
2111 len += l;
2112 }
2113 if (b)
2114 b[len] = '\0';
2115 setvar_p(intvar[F0], b);
2116 is_f0_split = TRUE;
2117
2118 } else if (v == intvar[F0]) {
2119 is_f0_split = FALSE;
2120
2121 } else if (v == intvar[FS]) {
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132 split_f0();
2133
2134 mk_splitter(getvar_s(v), &fsplitter);
2135 } else if (v == intvar[RS]) {
2136 mk_splitter(getvar_s(v), &rsplitter);
2137 } else if (v == intvar[IGNORECASE]) {
2138 icase = istrue(v);
2139 } else {
2140 n = getvar_i(intvar[NF]);
2141 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
2142
2143 }
2144}
2145
2146
2147static node *nextarg(node **pn)
2148{
2149 node *n;
2150
2151 n = *pn;
2152 if (n && n->info == TI_COMMA) {
2153 *pn = n->r.n;
2154 n = n->l.n;
2155 } else {
2156 *pn = NULL;
2157 }
2158 return n;
2159}
2160
2161static void hashwalk_init(var *v, xhash *array)
2162{
2163 hash_item *hi;
2164 unsigned i;
2165 walker_list *w;
2166 walker_list *prev_walker;
2167
2168 if (v->type & VF_WALK) {
2169 prev_walker = v->x.walker;
2170 } else {
2171 v->type |= VF_WALK;
2172 prev_walker = NULL;
2173 }
2174 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
2175
2176 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1);
2177 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
2178 w->cur = w->end = w->wbuf;
2179 w->prev = prev_walker;
2180 for (i = 0; i < array->csize; i++) {
2181 hi = array->items[i];
2182 while (hi) {
2183 w->end = stpcpy(w->end, hi->name) + 1;
2184 hi = hi->next;
2185 }
2186 }
2187}
2188
2189static int hashwalk_next(var *v)
2190{
2191 walker_list *w = v->x.walker;
2192
2193 if (w->cur >= w->end) {
2194 walker_list *prev_walker = w->prev;
2195
2196 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
2197 free(w);
2198 v->x.walker = prev_walker;
2199 return FALSE;
2200 }
2201
2202 setvar_s(v, nextword(&w->cur));
2203 return TRUE;
2204}
2205
2206
2207static int ptest(node *pattern)
2208{
2209
2210
2211
2212
2213 return istrue(evaluate(pattern, &G.ptest__tmpvar));
2214}
2215
2216
2217static int awk_getline(rstream *rsm, var *v)
2218{
2219 char *b;
2220 regmatch_t pmatch[1];
2221 int size, a, p, pp = 0;
2222 int fd, so, eo, r, rp;
2223 char c, *m, *s;
2224
2225 debug_printf_eval("entered %s()\n", __func__);
2226
2227
2228
2229
2230 fd = fileno(rsm->F);
2231 m = rsm->buffer;
2232 a = rsm->adv;
2233 p = rsm->pos;
2234 size = rsm->size;
2235 c = (char) rsplitter.n.info;
2236 rp = 0;
2237
2238 if (!m)
2239 m = qrealloc(m, 256, &size);
2240
2241 do {
2242 b = m + a;
2243 so = eo = p;
2244 r = 1;
2245 if (p > 0) {
2246 if (rsplitter.n.info == TI_REGEXP) {
2247 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2248 b, 1, pmatch, 0) == 0) {
2249 so = pmatch[0].rm_so;
2250 eo = pmatch[0].rm_eo;
2251 if (b[eo] != '\0')
2252 break;
2253 }
2254 } else if (c != '\0') {
2255 s = strchr(b+pp, c);
2256 if (!s)
2257 s = memchr(b+pp, '\0', p - pp);
2258 if (s) {
2259 so = eo = s-b;
2260 eo++;
2261 break;
2262 }
2263 } else {
2264 while (b[rp] == '\n')
2265 rp++;
2266 s = strstr(b+rp, "\n\n");
2267 if (s) {
2268 so = eo = s-b;
2269 while (b[eo] == '\n')
2270 eo++;
2271 if (b[eo] != '\0')
2272 break;
2273 }
2274 }
2275 }
2276
2277 if (a > 0) {
2278 memmove(m, m+a, p+1);
2279 b = m;
2280 a = 0;
2281 }
2282
2283 m = qrealloc(m, a+p+128, &size);
2284 b = m + a;
2285 pp = p;
2286 p += safe_read(fd, b+p, size-p-1);
2287 if (p < pp) {
2288 p = 0;
2289 r = 0;
2290 setvar_i(intvar[ERRNO], errno);
2291 }
2292 b[p] = '\0';
2293
2294 } while (p > pp);
2295
2296 if (p == 0) {
2297 r--;
2298 } else {
2299 c = b[so]; b[so] = '\0';
2300 setvar_s(v, b+rp);
2301 v->type |= VF_USER;
2302 b[so] = c;
2303 c = b[eo]; b[eo] = '\0';
2304 setvar_s(intvar[RT], b+so);
2305 b[eo] = c;
2306 }
2307
2308 rsm->buffer = m;
2309 rsm->adv = a + eo;
2310 rsm->pos = p - eo;
2311 rsm->size = size;
2312
2313 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2314
2315 return r;
2316}
2317
2318
2319#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2320# define awk_printf(a, b) awk_printf(a)
2321#endif
2322static char *awk_printf(node *n, size_t *len)
2323{
2324 char *b;
2325 char *fmt, *f;
2326 size_t i;
2327
2328
2329#define TMPVAR (&G.awk_printf__tmpvar)
2330
2331
2332
2333
2334 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
2335
2336
2337
2338
2339 b = NULL;
2340 i = 0;
2341 while (1) {
2342 char *s;
2343 char c;
2344 char sv;
2345 var *arg;
2346 size_t slen;
2347
2348
2349 s = f;
2350 while (1) {
2351 c = *f;
2352 if (!c)
2353 goto nul;
2354 f++;
2355 if (c == '%')
2356 break;
2357 }
2358
2359 c = *f;
2360 if (!c)
2361 goto nul;
2362 if (c == '%') {
2363 slen = f - s;
2364 s = xstrndup(s, slen);
2365 f++;
2366 goto append;
2367 }
2368 while (1) {
2369 if (isalpha(c))
2370 break;
2371 if (c == '*')
2372 syntax_error("%*x formats are not supported");
2373 c = *++f;
2374 if (!c) {
2375
2376 nul:
2377 slen = f - s;
2378 goto tail;
2379 }
2380 }
2381
2382
2383 arg = evaluate(nextarg(&n), TMPVAR);
2384
2385
2386
2387
2388 sv = *++f;
2389 *f = '\0';
2390 if (c == 'c') {
2391 char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
2392 char *r = xasprintf(s, cc ? cc : '^' );
2393 slen = strlen(r);
2394 if (cc == '\0')
2395 sprintf(r, s, cc);
2396 s = r;
2397 } else {
2398 if (c == 's') {
2399 s = xasprintf(s, getvar_s(arg));
2400 } else {
2401 double d = getvar_i(arg);
2402 if (strchr("diouxX", c)) {
2403
2404 s = xasprintf(s, (int)d);
2405 } else if (strchr("eEfFgGaA", c)) {
2406 s = xasprintf(s, d);
2407 } else {
2408
2409 syntax_error(EMSG_INV_FMT);
2410 }
2411 }
2412 slen = strlen(s);
2413 }
2414 *f = sv;
2415 append:
2416 if (i == 0) {
2417 b = s;
2418 i = slen;
2419 continue;
2420 }
2421 tail:
2422 b = xrealloc(b, i + slen + 1);
2423 strcpy(b + i, s);
2424 i += slen;
2425 if (!c)
2426 break;
2427 free(s);
2428 }
2429
2430 free(fmt);
2431
2432#undef TMPVAR
2433
2434#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2435 if (len)
2436 *len = i;
2437#endif
2438 return b;
2439}
2440
2441
2442
2443
2444
2445
2446
2447
2448static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2449{
2450 char *resbuf;
2451 const char *sp;
2452 int match_no, residx, replen, resbufsize;
2453 int regexec_flags;
2454 regmatch_t pmatch[10];
2455 regex_t sreg, *regex;
2456
2457 resbuf = NULL;
2458 residx = 0;
2459 match_no = 0;
2460 regexec_flags = 0;
2461 regex = as_regex(rn, &sreg);
2462 sp = getvar_s(src ? src : intvar[F0]);
2463 replen = strlen(repl);
2464 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2465 int so = pmatch[0].rm_so;
2466 int eo = pmatch[0].rm_eo;
2467
2468
2469 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2470 memcpy(resbuf + residx, sp, eo);
2471 residx += eo;
2472 if (++match_no >= nm) {
2473 const char *s;
2474 int nbs;
2475
2476
2477 residx -= (eo - so);
2478 nbs = 0;
2479 for (s = repl; *s; s++) {
2480 char c = resbuf[residx++] = *s;
2481 if (c == '\\') {
2482 nbs++;
2483 continue;
2484 }
2485 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2486 int j;
2487 residx -= ((nbs + 3) >> 1);
2488 j = 0;
2489 if (c != '&') {
2490 j = c - '0';
2491 nbs++;
2492 }
2493 if (nbs % 2) {
2494 resbuf[residx++] = c;
2495 } else {
2496 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2497 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2498 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2499 residx += n;
2500 }
2501 }
2502 nbs = 0;
2503 }
2504 }
2505
2506 regexec_flags = REG_NOTBOL;
2507 sp += eo;
2508 if (match_no == nm)
2509 break;
2510 if (eo == so) {
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521 resbuf[residx] = *sp;
2522 if (*sp == '\0')
2523 goto ret;
2524 sp++;
2525 residx++;
2526 }
2527 }
2528
2529 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2530 strcpy(resbuf + residx, sp);
2531 ret:
2532
2533 setvar_p(dest ? dest : intvar[F0], resbuf);
2534 if (regex == &sreg)
2535 regfree(regex);
2536 return match_no;
2537}
2538
2539static NOINLINE int do_mktime(const char *ds)
2540{
2541 struct tm then;
2542 int count;
2543
2544
2545 then.tm_isdst = -1;
2546
2547
2548
2549 count = sscanf(ds, "%u %u %u %u %u %u %d",
2550 &then.tm_year, &then.tm_mon, &then.tm_mday,
2551 &then.tm_hour, &then.tm_min, &then.tm_sec,
2552 &then.tm_isdst);
2553
2554 if (count < 6
2555 || (unsigned)then.tm_mon < 1
2556 || (unsigned)then.tm_year < 1900
2557 ) {
2558 return -1;
2559 }
2560
2561 then.tm_mon -= 1;
2562 then.tm_year -= 1900;
2563
2564 return mktime(&then);
2565}
2566
2567
2568static NOINLINE var *do_match(node *an1, const char *as0)
2569{
2570 regmatch_t pmatch[1];
2571 regex_t sreg, *re;
2572 int n, start, len;
2573
2574 re = as_regex(an1, &sreg);
2575 n = regexec(re, as0, 1, pmatch, 0);
2576 if (re == &sreg)
2577 regfree(re);
2578 start = 0;
2579 len = -1;
2580 if (n == 0) {
2581 start = pmatch[0].rm_so + 1;
2582 len = pmatch[0].rm_eo - pmatch[0].rm_so;
2583 }
2584 setvar_i(newvar("RLENGTH"), len);
2585 return setvar_i(newvar("RSTART"), start);
2586}
2587
2588
2589static NOINLINE var *exec_builtin(node *op, var *res)
2590{
2591#define tspl (G.exec_builtin__tspl)
2592
2593 var *tmpvars;
2594 node *an[4];
2595 var *av[4];
2596 const char *as[4];
2597 node *spl;
2598 uint32_t isr, info;
2599 int nargs;
2600 time_t tt;
2601 int i, l, ll, n;
2602
2603 tmpvars = nvalloc(4);
2604#define TMPVAR0 (tmpvars)
2605#define TMPVAR1 (tmpvars + 1)
2606#define TMPVAR2 (tmpvars + 2)
2607#define TMPVAR3 (tmpvars + 3)
2608#define TMPVAR(i) (tmpvars + (i))
2609 isr = info = op->info;
2610 op = op->l.n;
2611
2612 av[2] = av[3] = NULL;
2613 for (i = 0; i < 4 && op; i++) {
2614 an[i] = nextarg(&op);
2615 if (isr & 0x09000000) {
2616 av[i] = evaluate(an[i], TMPVAR(i));
2617 if (isr & 0x08000000)
2618 as[i] = getvar_s(av[i]);
2619 }
2620 isr >>= 1;
2621 }
2622
2623 nargs = i;
2624 if ((uint32_t)nargs < (info >> 30))
2625 syntax_error(EMSG_TOO_FEW_ARGS);
2626
2627 info &= OPNMASK;
2628 switch (info) {
2629
2630 case B_a2:
2631 if (ENABLE_FEATURE_AWK_LIBM)
2632 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2633 else
2634 syntax_error(EMSG_NO_MATH);
2635 break;
2636
2637 case B_sp: {
2638 char *s, *s1;
2639
2640 if (nargs > 2) {
2641 spl = (an[2]->info == TI_REGEXP) ? an[2]
2642 : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
2643 } else {
2644 spl = &fsplitter.n;
2645 }
2646
2647 n = awk_split(as[0], spl, &s);
2648 s1 = s;
2649 clear_array(iamarray(av[1]));
2650 for (i = 1; i <= n; i++)
2651 setari_u(av[1], i, nextword(&s));
2652 free(s1);
2653 setvar_i(res, n);
2654 break;
2655 }
2656
2657 case B_ss: {
2658 char *s;
2659
2660 l = strlen(as[0]);
2661 i = getvar_i(av[1]) - 1;
2662 if (i > l)
2663 i = l;
2664 if (i < 0)
2665 i = 0;
2666 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2667 if (n < 0)
2668 n = 0;
2669 s = xstrndup(as[0]+i, n);
2670 setvar_p(res, s);
2671 break;
2672 }
2673
2674
2675
2676 case B_an:
2677 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2678 break;
2679
2680 case B_co:
2681 setvar_i(res, ~getvar_i_int(av[0]));
2682 break;
2683
2684 case B_ls:
2685 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2686 break;
2687
2688 case B_or:
2689 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2690 break;
2691
2692 case B_rs:
2693 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2694 break;
2695
2696 case B_xo:
2697 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2698 break;
2699
2700 case B_lo:
2701 case B_up: {
2702 char *s, *s1;
2703 s1 = s = xstrdup(as[0]);
2704 while (*s1) {
2705
2706 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2707 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2708 s1++;
2709 }
2710 setvar_p(res, s);
2711 break;
2712 }
2713
2714 case B_ix:
2715 n = 0;
2716 ll = strlen(as[1]);
2717 l = strlen(as[0]) - ll;
2718 if (ll > 0 && l >= 0) {
2719 if (!icase) {
2720 char *s = strstr(as[0], as[1]);
2721 if (s)
2722 n = (s - as[0]) + 1;
2723 } else {
2724
2725
2726
2727 for (i = 0; i <= l; i++) {
2728 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2729 n = i+1;
2730 break;
2731 }
2732 }
2733 }
2734 }
2735 setvar_i(res, n);
2736 break;
2737
2738 case B_ti:
2739 if (nargs > 1)
2740 tt = getvar_i(av[1]);
2741 else
2742 time(&tt);
2743
2744 i = strftime(g_buf, MAXVARFMT,
2745 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2746 localtime(&tt));
2747 g_buf[i] = '\0';
2748 setvar_s(res, g_buf);
2749 break;
2750
2751 case B_mt:
2752 setvar_i(res, do_mktime(as[0]));
2753 break;
2754
2755 case B_ma:
2756 res = do_match(an[1], as[0]);
2757 break;
2758
2759 case B_ge:
2760 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2761 break;
2762
2763 case B_gs:
2764 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2765 break;
2766
2767 case B_su:
2768 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2769 break;
2770 }
2771
2772 nvfree(tmpvars, 4);
2773#undef TMPVAR0
2774#undef TMPVAR1
2775#undef TMPVAR2
2776#undef TMPVAR3
2777#undef TMPVAR
2778
2779 return res;
2780#undef tspl
2781}
2782
2783
2784
2785static int is_assignment(const char *expr)
2786{
2787 char *exprc, *val;
2788
2789 val = (char*)endofname(expr);
2790 if (val == (char*)expr || *val != '=') {
2791 return FALSE;
2792 }
2793
2794 exprc = xstrdup(expr);
2795 val = exprc + (val - expr);
2796 *val++ = '\0';
2797
2798 unescape_string_in_place(val);
2799 setvar_u(newvar(exprc), val);
2800 free(exprc);
2801 return TRUE;
2802}
2803
2804
2805static rstream *next_input_file(void)
2806{
2807#define rsm (G.next_input_file__rsm)
2808#define files_happen (G.next_input_file__files_happen)
2809
2810 const char *fname, *ind;
2811
2812 if (rsm.F)
2813 fclose(rsm.F);
2814 rsm.F = NULL;
2815 rsm.pos = rsm.adv = 0;
2816
2817 for (;;) {
2818 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2819 if (files_happen)
2820 return NULL;
2821 fname = "-";
2822 rsm.F = stdin;
2823 break;
2824 }
2825 ind = getvar_s(incvar(intvar[ARGIND]));
2826 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2827 if (fname && *fname && !is_assignment(fname)) {
2828 rsm.F = xfopen_stdin(fname);
2829 break;
2830 }
2831 }
2832
2833 files_happen = TRUE;
2834 setvar_s(intvar[FILENAME], fname);
2835 return &rsm;
2836#undef rsm
2837#undef files_happen
2838}
2839
2840
2841
2842
2843
2844
2845
2846#define XC(n) ((n) >> 8)
2847
2848static var *evaluate(node *op, var *res)
2849{
2850
2851#define fnargs (G.evaluate__fnargs)
2852
2853#define seed (G.evaluate__seed)
2854#define sreg (G.evaluate__sreg)
2855
2856 var *tmpvars;
2857
2858 if (!op)
2859 return setvar_s(res, NULL);
2860
2861 debug_printf_eval("entered %s()\n", __func__);
2862
2863 tmpvars = nvalloc(2);
2864#define TMPVAR0 (tmpvars)
2865#define TMPVAR1 (tmpvars + 1)
2866
2867 while (op) {
2868 struct {
2869 var *v;
2870 const char *s;
2871 } L = L;
2872 struct {
2873 var *v;
2874 const char *s;
2875 } R = R;
2876 double L_d = L_d;
2877 uint32_t opinfo;
2878 int opn;
2879 node *op1;
2880
2881 opinfo = op->info;
2882 opn = (opinfo & OPNMASK);
2883 g_lineno = op->lineno;
2884 op1 = op->l.n;
2885 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2886
2887
2888 if (opinfo & OF_RES1) {
2889 if ((opinfo & OF_REQUIRED) && !op1)
2890 syntax_error(EMSG_TOO_FEW_ARGS);
2891 L.v = evaluate(op1, TMPVAR0);
2892 if (opinfo & OF_STR1) {
2893 L.s = getvar_s(L.v);
2894 debug_printf_eval("L.s:'%s'\n", L.s);
2895 }
2896 if (opinfo & OF_NUM1) {
2897 L_d = getvar_i(L.v);
2898 debug_printf_eval("L_d:%f\n", L_d);
2899 }
2900 }
2901
2902
2903
2904
2905
2906
2907
2908 if (opinfo & OF_RES2) {
2909 R.v = evaluate(op->r.n, TMPVAR1);
2910
2911
2912 if (opinfo & OF_STR2) {
2913 R.s = getvar_s(R.v);
2914 debug_printf_eval("R.s:'%s'\n", R.s);
2915 }
2916 }
2917
2918 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2919 switch (XC(opinfo & OPCLSMASK)) {
2920
2921
2922
2923
2924 case XC( OC_TEST ):
2925 debug_printf_eval("TEST\n");
2926 if (op1->info == TI_COMMA) {
2927
2928 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2929 op->info |= OF_CHECKED;
2930 if (ptest(op1->r.n))
2931 op->info &= ~OF_CHECKED;
2932 op = op->a.n;
2933 } else {
2934 op = op->r.n;
2935 }
2936 } else {
2937 op = ptest(op1) ? op->a.n : op->r.n;
2938 }
2939 break;
2940
2941
2942 case XC( OC_EXEC ):
2943 debug_printf_eval("EXEC\n");
2944 break;
2945
2946
2947 case XC( OC_BR ):
2948 debug_printf_eval("BR\n");
2949 op = istrue(L.v) ? op->a.n : op->r.n;
2950 break;
2951
2952
2953 case XC( OC_WALKINIT ):
2954 debug_printf_eval("WALKINIT\n");
2955 hashwalk_init(L.v, iamarray(R.v));
2956 break;
2957
2958
2959 case XC( OC_WALKNEXT ):
2960 debug_printf_eval("WALKNEXT\n");
2961 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2962 break;
2963
2964 case XC( OC_PRINT ):
2965 debug_printf_eval("PRINT /\n");
2966 case XC( OC_PRINTF ):
2967 debug_printf_eval("PRINTF\n");
2968 {
2969 FILE *F = stdout;
2970
2971 if (op->r.n) {
2972 rstream *rsm = newfile(R.s);
2973 if (!rsm->F) {
2974 if (opn == '|') {
2975 rsm->F = popen(R.s, "w");
2976 if (rsm->F == NULL)
2977 bb_simple_perror_msg_and_die("popen");
2978 rsm->is_pipe = 1;
2979 } else {
2980 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2981 }
2982 }
2983 F = rsm->F;
2984 }
2985
2986
2987
2988
2989 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2990 if (!op1) {
2991 fputs(getvar_s(intvar[F0]), F);
2992 } else {
2993 for (;;) {
2994 var *v = evaluate(nextarg(&op1), TMPVAR0);
2995 if (v->type & VF_NUMBER) {
2996 fmt_num(getvar_s(intvar[OFMT]),
2997 getvar_i(v));
2998 fputs(g_buf, F);
2999 } else {
3000 fputs(getvar_s(v), F);
3001 }
3002 if (!op1)
3003 break;
3004 fputs(getvar_s(intvar[OFS]), F);
3005 }
3006 }
3007 fputs(getvar_s(intvar[ORS]), F);
3008 } else {
3009 IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
3010 char *s = awk_printf(op1, &len);
3011#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3012 fwrite(s, len, 1, F);
3013#else
3014 fputs(s, F);
3015#endif
3016 free(s);
3017 }
3018 fflush(F);
3019 break;
3020 }
3021
3022 case XC( OC_DELETE ):
3023 debug_printf_eval("DELETE\n");
3024 {
3025
3026
3027
3028 uint32_t info = op1->info & OPCLSMASK;
3029 var *v;
3030
3031 if (info == OC_VAR) {
3032 v = op1->l.v;
3033 } else if (info == OC_FNARG) {
3034 v = &fnargs[op1->l.aidx];
3035 } else {
3036 syntax_error(EMSG_NOT_ARRAY);
3037 }
3038 if (op1->r.n) {
3039 const char *s;
3040 s = getvar_s(evaluate(op1->r.n, TMPVAR0));
3041 hash_remove(iamarray(v), s);
3042 } else {
3043 clear_array(iamarray(v));
3044 }
3045 break;
3046 }
3047
3048 case XC( OC_NEWSOURCE ):
3049 debug_printf_eval("NEWSOURCE\n");
3050 g_progname = op->l.new_progname;
3051 break;
3052
3053 case XC( OC_RETURN ):
3054 debug_printf_eval("RETURN\n");
3055 copyvar(res, L.v);
3056 break;
3057
3058 case XC( OC_NEXTFILE ):
3059 debug_printf_eval("NEXTFILE\n");
3060 nextfile = TRUE;
3061 case XC( OC_NEXT ):
3062 debug_printf_eval("NEXT\n");
3063 nextrec = TRUE;
3064 case XC( OC_DONE ):
3065 debug_printf_eval("DONE\n");
3066 clrvar(res);
3067 break;
3068
3069 case XC( OC_EXIT ):
3070 debug_printf_eval("EXIT\n");
3071 if (op1)
3072 G.exitcode = (int)L_d;
3073 awk_exit();
3074
3075
3076
3077 case XC( OC_VAR ):
3078 debug_printf_eval("VAR\n");
3079 L.v = op->l.v;
3080 if (L.v == intvar[NF])
3081 split_f0();
3082 goto v_cont;
3083
3084 case XC( OC_FNARG ):
3085 debug_printf_eval("FNARG[%d]\n", op->l.aidx);
3086 L.v = &fnargs[op->l.aidx];
3087 v_cont:
3088 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
3089 break;
3090
3091 case XC( OC_IN ):
3092 debug_printf_eval("IN\n");
3093 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
3094 break;
3095
3096 case XC( OC_REGEXP ):
3097 debug_printf_eval("REGEXP\n");
3098 op1 = op;
3099 L.s = getvar_s(intvar[F0]);
3100 goto re_cont;
3101
3102 case XC( OC_MATCH ):
3103 debug_printf_eval("MATCH\n");
3104 op1 = op->r.n;
3105 re_cont:
3106 {
3107 regex_t *re = as_regex(op1, &sreg);
3108 int i = regexec(re, L.s, 0, NULL, 0);
3109 if (re == &sreg)
3110 regfree(re);
3111 setvar_i(res, (i == 0) ^ (opn == '!'));
3112 }
3113 break;
3114
3115 case XC( OC_MOVE ):
3116 debug_printf_eval("MOVE\n");
3117
3118 if (R.v == TMPVAR1
3119 && !(R.v->type & VF_NUMBER)
3120
3121
3122
3123 ) {
3124 res = setvar_p(L.v, R.v->string);
3125 R.v->string = NULL;
3126 } else {
3127 res = copyvar(L.v, R.v);
3128 }
3129 break;
3130
3131 case XC( OC_TERNARY ):
3132 debug_printf_eval("TERNARY\n");
3133 if (op->r.n->info != TI_COLON)
3134 syntax_error(EMSG_POSSIBLE_ERROR);
3135 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
3136 break;
3137
3138 case XC( OC_FUNC ): {
3139 var *argvars, *sv_fnargs;
3140 const char *sv_progname;
3141 int nargs, i;
3142
3143 debug_printf_eval("FUNC\n");
3144
3145 if (!op->r.f->defined)
3146 syntax_error(EMSG_UNDEF_FUNC);
3147
3148
3149 nargs = op->r.f->nargs;
3150 argvars = nvalloc(nargs);
3151 i = 0;
3152 while (op1) {
3153 var *arg = evaluate(nextarg(&op1), TMPVAR0);
3154 if (i == nargs) {
3155
3156
3157
3158 clrvar(arg);
3159 continue;
3160 }
3161 copyvar(&argvars[i], arg);
3162 argvars[i].type |= VF_CHILD;
3163 argvars[i].x.parent = arg;
3164 i++;
3165 }
3166
3167 sv_fnargs = fnargs;
3168 sv_progname = g_progname;
3169
3170 fnargs = argvars;
3171 res = evaluate(op->r.f->body.first, res);
3172 nvfree(argvars, nargs);
3173
3174 g_progname = sv_progname;
3175 fnargs = sv_fnargs;
3176
3177 break;
3178 }
3179
3180 case XC( OC_GETLINE ):
3181 debug_printf_eval("GETLINE /\n");
3182 case XC( OC_PGETLINE ):
3183 debug_printf_eval("PGETLINE\n");
3184 {
3185 rstream *rsm;
3186 int i;
3187
3188 if (op1) {
3189 rsm = newfile(L.s);
3190 if (!rsm->F) {
3191
3192 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
3193 rsm->F = popen(L.s, "r");
3194 rsm->is_pipe = TRUE;
3195 } else {
3196 rsm->F = fopen_for_read(L.s);
3197 }
3198 }
3199 } else {
3200 if (!iF)
3201 iF = next_input_file();
3202 rsm = iF;
3203 }
3204
3205 if (!rsm || !rsm->F) {
3206 setvar_i(intvar[ERRNO], errno);
3207 setvar_i(res, -1);
3208 break;
3209 }
3210
3211 if (!op->r.n)
3212 R.v = intvar[F0];
3213
3214 i = awk_getline(rsm, R.v);
3215 if (i > 0 && !op1) {
3216 incvar(intvar[FNR]);
3217 incvar(intvar[NR]);
3218 }
3219 setvar_i(res, i);
3220 break;
3221 }
3222
3223
3224 case XC( OC_FBLTIN ): {
3225 double R_d = R_d;
3226 debug_printf_eval("FBLTIN\n");
3227
3228 if (op1 && op1->info == TI_COMMA)
3229
3230 syntax_error("Too many arguments");
3231
3232 switch (opn) {
3233 case F_in:
3234 R_d = (long long)L_d;
3235 break;
3236
3237 case F_rn:
3238 if (op1)
3239 syntax_error("Too many arguments");
3240 {
3241#if RAND_MAX >= 0x7fffffff
3242 uint32_t u = ((uint32_t)rand() << 16) ^ rand();
3243 uint64_t v = ((uint64_t)rand() << 32) | u;
3244
3245# if RAND_MAX > 0x7fffffff
3246 v &= 0x7fffffffffffffffULL;
3247# endif
3248 R_d = (double)v / 0x8000000000000000ULL;
3249#else
3250# error Not implemented for this value of RAND_MAX
3251#endif
3252 break;
3253 }
3254 case F_co:
3255 if (ENABLE_FEATURE_AWK_LIBM) {
3256 R_d = cos(L_d);
3257 break;
3258 }
3259
3260 case F_ex:
3261 if (ENABLE_FEATURE_AWK_LIBM) {
3262 R_d = exp(L_d);
3263 break;
3264 }
3265
3266 case F_lg:
3267 if (ENABLE_FEATURE_AWK_LIBM) {
3268 R_d = log(L_d);
3269 break;
3270 }
3271
3272 case F_si:
3273 if (ENABLE_FEATURE_AWK_LIBM) {
3274 R_d = sin(L_d);
3275 break;
3276 }
3277
3278 case F_sq:
3279 if (ENABLE_FEATURE_AWK_LIBM) {
3280 R_d = sqrt(L_d);
3281 break;
3282 }
3283
3284 syntax_error(EMSG_NO_MATH);
3285 break;
3286
3287 case F_sr:
3288 R_d = (double)seed;
3289 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
3290 srand(seed);
3291 break;
3292
3293 case F_ti:
3294 if (op1)
3295 syntax_error("Too many arguments");
3296 R_d = time(NULL);
3297 break;
3298
3299 case F_le:
3300 debug_printf_eval("length: L.s:'%s'\n", L.s);
3301 if (!op1) {
3302 L.s = getvar_s(intvar[F0]);
3303 debug_printf_eval("length: L.s='%s'\n", L.s);
3304 }
3305 else if (L.v->type & VF_ARRAY) {
3306 R_d = L.v->x.array->nel;
3307 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
3308 break;
3309 }
3310 R_d = strlen(L.s);
3311 break;
3312
3313 case F_sy:
3314 fflush_all();
3315 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
3316 ? (system(L.s) >> 8) : 0;
3317 break;
3318
3319 case F_ff:
3320 if (!op1) {
3321 fflush(stdout);
3322 } else if (L.s && *L.s) {
3323 rstream *rsm = newfile(L.s);
3324 fflush(rsm->F);
3325 } else {
3326 fflush_all();
3327 }
3328 break;
3329
3330 case F_cl: {
3331 rstream *rsm;
3332 int err = 0;
3333 rsm = (rstream *)hash_search(fdhash, L.s);
3334 debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
3335 if (rsm) {
3336 debug_printf_eval("OC_FBLTIN F_cl "
3337 "rsm->is_pipe:%d, ->F:%p\n",
3338 rsm->is_pipe, rsm->F);
3339
3340
3341
3342
3343 if (rsm->F)
3344 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
3345
3346
3347
3348
3349
3350 free(rsm->buffer);
3351 hash_remove(fdhash, L.s);
3352 }
3353 if (err)
3354 setvar_i(intvar[ERRNO], errno);
3355 R_d = (double)err;
3356 break;
3357 }
3358 }
3359 setvar_i(res, R_d);
3360 break;
3361 }
3362
3363 case XC( OC_BUILTIN ):
3364 debug_printf_eval("BUILTIN\n");
3365 res = exec_builtin(op, res);
3366 break;
3367
3368 case XC( OC_SPRINTF ):
3369 debug_printf_eval("SPRINTF\n");
3370 setvar_p(res, awk_printf(op1, NULL));
3371 break;
3372
3373 case XC( OC_UNARY ):
3374 debug_printf_eval("UNARY\n");
3375 {
3376 double Ld, R_d;
3377
3378 Ld = R_d = getvar_i(R.v);
3379 switch (opn) {
3380 case 'P':
3381 Ld = ++R_d;
3382 goto r_op_change;
3383 case 'p':
3384 R_d++;
3385 goto r_op_change;
3386 case 'M':
3387 Ld = --R_d;
3388 goto r_op_change;
3389 case 'm':
3390 R_d--;
3391 r_op_change:
3392 setvar_i(R.v, R_d);
3393 break;
3394 case '!':
3395 Ld = !istrue(R.v);
3396 break;
3397 case '-':
3398 Ld = -R_d;
3399 break;
3400 }
3401 setvar_i(res, Ld);
3402 break;
3403 }
3404
3405 case XC( OC_FIELD ):
3406 debug_printf_eval("FIELD\n");
3407 {
3408 int i = (int)getvar_i(R.v);
3409 if (i < 0)
3410 syntax_error(EMSG_NEGATIVE_FIELD);
3411 if (i == 0) {
3412 res = intvar[F0];
3413 } else {
3414 split_f0();
3415 if (i > nfields)
3416 fsrealloc(i);
3417 res = &Fields[i - 1];
3418 }
3419 break;
3420 }
3421
3422
3423 case XC( OC_CONCAT ):
3424 debug_printf_eval("CONCAT /\n");
3425 case XC( OC_COMMA ): {
3426 const char *sep = "";
3427 debug_printf_eval("COMMA\n");
3428 if (opinfo == TI_COMMA)
3429 sep = getvar_s(intvar[SUBSEP]);
3430 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
3431 break;
3432 }
3433
3434 case XC( OC_LAND ):
3435 debug_printf_eval("LAND\n");
3436 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
3437 break;
3438
3439 case XC( OC_LOR ):
3440 debug_printf_eval("LOR\n");
3441 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
3442 break;
3443
3444 case XC( OC_BINARY ):
3445 debug_printf_eval("BINARY /\n");
3446 case XC( OC_REPLACE ):
3447 debug_printf_eval("REPLACE\n");
3448 {
3449 double R_d = getvar_i(R.v);
3450 debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
3451 switch (opn) {
3452 case '+':
3453 L_d += R_d;
3454 break;
3455 case '-':
3456 L_d -= R_d;
3457 break;
3458 case '*':
3459 L_d *= R_d;
3460 break;
3461 case '/':
3462 if (R_d == 0)
3463 syntax_error(EMSG_DIV_BY_ZERO);
3464 L_d /= R_d;
3465 break;
3466 case '&':
3467 if (ENABLE_FEATURE_AWK_LIBM)
3468 L_d = pow(L_d, R_d);
3469 else
3470 syntax_error(EMSG_NO_MATH);
3471 break;
3472 case '%':
3473 if (R_d == 0)
3474 syntax_error(EMSG_DIV_BY_ZERO);
3475 L_d -= (long long)(L_d / R_d) * R_d;
3476 break;
3477 }
3478 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3479 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3480 break;
3481 }
3482
3483 case XC( OC_COMPARE ): {
3484 int i = i;
3485 double Ld;
3486 debug_printf_eval("COMPARE\n");
3487
3488 if (is_numeric(L.v) && is_numeric(R.v)) {
3489 Ld = getvar_i(L.v) - getvar_i(R.v);
3490 } else {
3491 const char *l = getvar_s(L.v);
3492 const char *r = getvar_s(R.v);
3493 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3494 }
3495 switch (opn & 0xfe) {
3496 case 0:
3497 i = (Ld > 0);
3498 break;
3499 case 2:
3500 i = (Ld >= 0);
3501 break;
3502 case 4:
3503 i = (Ld == 0);
3504 break;
3505 }
3506 setvar_i(res, (i == 0) ^ (opn & 1));
3507 break;
3508 }
3509
3510 default:
3511 syntax_error(EMSG_POSSIBLE_ERROR);
3512 }
3513
3514 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3515 op = op->a.n;
3516 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3517 break;
3518 if (nextrec)
3519 break;
3520 }
3521
3522 nvfree(tmpvars, 2);
3523#undef TMPVAR0
3524#undef TMPVAR1
3525
3526 debug_printf_eval("returning from %s(): %p\n", __func__, res);
3527 return res;
3528#undef fnargs
3529#undef seed
3530#undef sreg
3531}
3532
3533
3534
3535static int awk_exit(void)
3536{
3537 unsigned i;
3538
3539 if (!exiting) {
3540 exiting = TRUE;
3541 nextrec = FALSE;
3542 evaluate(endseq.first, &G.exit__tmpvar);
3543 }
3544
3545
3546 for (i = 0; i < fdhash->csize; i++) {
3547 hash_item *hi;
3548 hi = fdhash->items[i];
3549 while (hi) {
3550 if (hi->data.rs.F && hi->data.rs.is_pipe)
3551 pclose(hi->data.rs.F);
3552 hi = hi->next;
3553 }
3554 }
3555
3556 exit(G.exitcode);
3557}
3558
3559int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3560int awk_main(int argc UNUSED_PARAM, char **argv)
3561{
3562 unsigned opt;
3563 char *opt_F;
3564 llist_t *list_v = NULL;
3565 llist_t *list_f = NULL;
3566#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3567 llist_t *list_e = NULL;
3568#endif
3569 int i;
3570
3571 INIT_G();
3572
3573
3574
3575 if (ENABLE_LOCALE_SUPPORT)
3576 setlocale(LC_NUMERIC, "C");
3577
3578
3579 vhash = hash_init();
3580 {
3581 char *vnames = (char *)vNames;
3582 char *vvalues = (char *)vValues;
3583 for (i = 0; *vnames; i++) {
3584 var *v;
3585 intvar[i] = v = newvar(nextword(&vnames));
3586 if (*vvalues != '\377')
3587 setvar_s(v, nextword(&vvalues));
3588 else
3589 setvar_i(v, 0);
3590
3591 if (*vnames == '*') {
3592 v->type |= VF_SPECIAL;
3593 vnames++;
3594 }
3595 }
3596 }
3597
3598 handle_special(intvar[FS]);
3599 handle_special(intvar[RS]);
3600
3601
3602 if (environ) {
3603 char **envp;
3604 for (envp = environ; *envp; envp++) {
3605
3606 char *s = *envp;
3607 char *s1 = strchr(s, '=');
3608 if (s1) {
3609 *s1 = '\0';
3610
3611
3612 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3613 *s1 = '=';
3614 }
3615 }
3616 }
3617 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3618 argv += optind;
3619
3620 if (opt & OPT_W)
3621 bb_simple_error_msg("warning: option -W is ignored");
3622 if (opt & OPT_F) {
3623 unescape_string_in_place(opt_F);
3624 setvar_s(intvar[FS], opt_F);
3625 }
3626 while (list_v) {
3627 if (!is_assignment(llist_pop(&list_v)))
3628 bb_show_usage();
3629 }
3630
3631
3632 fnhash = hash_init();
3633 ahash = hash_init();
3634 while (list_f) {
3635 int fd;
3636 char *s;
3637
3638 g_progname = llist_pop(&list_f);
3639 fd = xopen_stdin(g_progname);
3640 s = xmalloc_read(fd, NULL);
3641 close(fd);
3642 parse_program(s);
3643 free(s);
3644 }
3645 g_progname = "cmd. line";
3646#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3647 while (list_e) {
3648 parse_program(llist_pop(&list_e));
3649 }
3650#endif
3651
3652
3653 if (!(opt & (OPT_f | OPT_e))) {
3654 if (!*argv)
3655 bb_show_usage();
3656 parse_program(*argv++);
3657 }
3658
3659
3660
3661
3662 free(fnhash->items);
3663 free(fnhash);
3664 fnhash = NULL;
3665
3666
3667
3668
3669
3670 setari_u(intvar[ARGV], 0, "awk");
3671 i = 0;
3672 while (*argv)
3673 setari_u(intvar[ARGV], ++i, *argv++);
3674 setvar_i(intvar[ARGC], i + 1);
3675
3676
3677 newfile("/dev/stdin")->F = stdin;
3678 newfile("/dev/stdout")->F = stdout;
3679 newfile("/dev/stderr")->F = stderr;
3680
3681 evaluate(beginseq.first, &G.main__tmpvar);
3682 if (!mainseq.first && !endseq.first)
3683 awk_exit();
3684
3685
3686 if (!iF)
3687 iF = next_input_file();
3688
3689
3690 while (iF) {
3691 nextfile = FALSE;
3692 setvar_i(intvar[FNR], 0);
3693
3694 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3695 nextrec = FALSE;
3696 incvar(intvar[NR]);
3697 incvar(intvar[FNR]);
3698 evaluate(mainseq.first, &G.main__tmpvar);
3699
3700 if (nextfile)
3701 break;
3702 }
3703
3704 if (i < 0)
3705 syntax_error(strerror(errno));
3706
3707 iF = next_input_file();
3708 }
3709
3710 awk_exit();
3711
3712}
3713