1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48#include "libbb.h"
49#include "xregex.h"
50#include <math.h>
51
52
53
54
55
56
57#define debug_printf_walker(...) do {} while (0)
58#define debug_printf_eval(...) do {} while (0)
59#define debug_printf_parse(...) do {} while (0)
60
61#ifndef debug_printf_walker
62# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
63#endif
64#ifndef debug_printf_eval
65# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
66#endif
67#ifndef debug_printf_parse
68# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
69#else
70# define debug_parse_print_tc(...) ((void)0)
71#endif
72
73
74
75
76
77
78#define OPTSTR_AWK "+" \
79 "F:v:*f:*" \
80 IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
81 "W:"
82enum {
83 OPTBIT_F,
84 OPTBIT_v,
85 OPTBIT_f,
86 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,)
87 OPTBIT_W,
88 OPT_F = 1 << OPTBIT_F,
89 OPT_v = 1 << OPTBIT_v,
90 OPT_f = 1 << OPTBIT_f,
91 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
92 OPT_W = 1 << OPTBIT_W
93};
94
95#define MAXVARFMT 240
96
97
98#define VF_NUMBER 0x0001
99#define VF_ARRAY 0x0002
100
101#define VF_CACHED 0x0100
102#define VF_USER 0x0200
103#define VF_SPECIAL 0x0400
104#define VF_WALK 0x0800
105#define VF_FSTR 0x1000
106#define VF_CHILD 0x2000
107#define VF_DIRTY 0x4000
108
109
110#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
111
112typedef struct walker_list {
113 char *end;
114 char *cur;
115 struct walker_list *prev;
116 char wbuf[1];
117} walker_list;
118
119
120typedef struct var_s {
121 unsigned type;
122 char *string;
123 double number;
124 union {
125 int aidx;
126 struct xhash_s *array;
127 struct var_s *parent;
128 walker_list *walker;
129 } x;
130} var;
131
132
133typedef struct chain_s {
134 struct node_s *first;
135 struct node_s *last;
136 const char *programname;
137} chain;
138
139
140typedef struct func_s {
141 unsigned nargs;
142 smallint defined;
143 struct chain_s body;
144} func;
145
146
147typedef struct rstream_s {
148 FILE *F;
149 char *buffer;
150 int adv;
151 int size;
152 int pos;
153 smallint is_pipe;
154} rstream;
155
156typedef struct hash_item_s {
157 union {
158 struct var_s v;
159 struct rstream_s rs;
160 struct func_s f;
161 } data;
162 struct hash_item_s *next;
163 char name[1];
164} hash_item;
165
166typedef struct xhash_s {
167 unsigned nel;
168 unsigned csize;
169 unsigned nprime;
170 unsigned glen;
171 struct hash_item_s **items;
172} xhash;
173
174
175typedef struct node_s {
176 uint32_t info;
177 unsigned lineno;
178 union {
179 struct node_s *n;
180 var *v;
181 int aidx;
182 const char *new_progname;
183 regex_t *re;
184 } l;
185 union {
186 struct node_s *n;
187 regex_t *ire;
188 func *f;
189 } r;
190 union {
191 struct node_s *n;
192 } a;
193} node;
194
195typedef struct tsplitter_s {
196 node n;
197 regex_t re[2];
198} tsplitter;
199
200
201
202#define TC_LPAREN (1 << 0)
203#define TC_RPAREN (1 << 1)
204#define TC_REGEXP (1 << 2)
205#define TC_OUTRDR (1 << 3)
206#define TC_UOPPOST (1 << 4)
207#define TC_UOPPRE1 (1 << 5)
208#define TC_BINOPX (1 << 6)
209#define TC_IN (1 << 7)
210#define TC_COMMA (1 << 8)
211#define TC_PIPE (1 << 9)
212#define TC_UOPPRE2 (1 << 10)
213#define TC_ARRTERM (1 << 11)
214#define TC_LBRACE (1 << 12)
215#define TC_RBRACE (1 << 13)
216#define TC_SEMICOL (1 << 14)
217#define TC_NEWLINE (1 << 15)
218#define TC_STATX (1 << 16)
219#define TC_WHILE (1 << 17)
220#define TC_ELSE (1 << 18)
221#define TC_BUILTIN (1 << 19)
222
223
224
225
226
227#define TC_LENGTH (1 << 20)
228#define TC_GETLINE (1 << 21)
229#define TC_FUNCDECL (1 << 22)
230#define TC_BEGIN (1 << 23)
231#define TC_END (1 << 24)
232#define TC_EOF (1 << 25)
233#define TC_VARIABLE (1 << 26)
234#define TC_ARRAY (1 << 27)
235#define TC_FUNCTION (1 << 28)
236#define TC_STRING (1 << 29)
237#define TC_NUMBER (1 << 30)
238
239#ifndef debug_parse_print_tc
240static void debug_parse_print_tc(uint32_t n)
241{
242 if (n & TC_LPAREN ) debug_printf_parse(" LPAREN" );
243 if (n & TC_RPAREN ) debug_printf_parse(" RPAREN" );
244 if (n & TC_REGEXP ) debug_printf_parse(" REGEXP" );
245 if (n & TC_OUTRDR ) debug_printf_parse(" OUTRDR" );
246 if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
247 if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
248 if (n & TC_BINOPX ) debug_printf_parse(" BINOPX" );
249 if (n & TC_IN ) debug_printf_parse(" IN" );
250 if (n & TC_COMMA ) debug_printf_parse(" COMMA" );
251 if (n & TC_PIPE ) debug_printf_parse(" PIPE" );
252 if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
253 if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
254 if (n & TC_LBRACE ) debug_printf_parse(" LBRACE" );
255 if (n & TC_RBRACE ) debug_printf_parse(" RBRACE" );
256 if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
257 if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
258 if (n & TC_STATX ) debug_printf_parse(" STATX" );
259 if (n & TC_WHILE ) debug_printf_parse(" WHILE" );
260 if (n & TC_ELSE ) debug_printf_parse(" ELSE" );
261 if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
262 if (n & TC_LENGTH ) debug_printf_parse(" LENGTH" );
263 if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
264 if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
265 if (n & TC_BEGIN ) debug_printf_parse(" BEGIN" );
266 if (n & TC_END ) debug_printf_parse(" END" );
267 if (n & TC_EOF ) debug_printf_parse(" EOF" );
268 if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
269 if (n & TC_ARRAY ) debug_printf_parse(" ARRAY" );
270 if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
271 if (n & TC_STRING ) debug_printf_parse(" STRING" );
272 if (n & TC_NUMBER ) debug_printf_parse(" NUMBER" );
273}
274#endif
275
276
277#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
278
279#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
280
281#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
282 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
283 | TC_LPAREN | TC_STRING | TC_NUMBER)
284
285#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
286#define TS_STATEMNT (TC_STATX | TC_WHILE)
287
288
289#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
290 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
291 | TC_FUNCDECL | TC_BEGIN | TC_END)
292
293
294#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
295 | TC_SEMICOL | TC_NEWLINE)
296
297
298#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
299
300#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \
301 | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
302
303
304
305#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
306 | TC_STRING | TC_NUMBER | TC_UOPPOST \
307 | TC_LENGTH)
308#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
309
310#define OF_RES1 0x010000
311#define OF_RES2 0x020000
312#define OF_STR1 0x040000
313#define OF_STR2 0x080000
314#define OF_NUM1 0x100000
315#define OF_CHECKED 0x200000
316#define OF_REQUIRED 0x400000
317
318
319#define xx 0
320#define xV OF_RES2
321#define xS (OF_RES2 | OF_STR2)
322#define Vx OF_RES1
323#define Rx OF_REQUIRED
324#define VV (OF_RES1 | OF_RES2)
325#define Nx (OF_RES1 | OF_NUM1)
326#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
327#define Sx (OF_RES1 | OF_STR1)
328#define SV (OF_RES1 | OF_STR1 | OF_RES2)
329#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
330
331#define OPCLSMASK 0xFF00
332#define OPNMASK 0x007F
333
334
335
336
337#undef P
338#undef PRIMASK
339#undef PRIMASK2
340#define P(x) (x << 24)
341#define PRIMASK 0x7F000000
342#define PRIMASK2 0x7E000000
343
344
345#define SHIFT_TIL_THIS 0x0600
346#define RECUR_FROM_THIS 0x1000
347enum {
348 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
349 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
350
351 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
352 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
353 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
354
355 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
356 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
357 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
358 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
359 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
360 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
361 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
362 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
363 OC_DONE = 0x2800,
364
365 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
366 ST_WHILE = 0x3300
367};
368
369
370enum {
371 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
372 F_ti, F_le, F_sy, F_ff, F_cl
373};
374
375
376enum {
377 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
378 B_ge, B_gs, B_su,
379 B_an, B_co, B_ls, B_or, B_rs, B_xo,
380};
381
382
383
384#define NTC "\377"
385#define NTCC '\377'
386
387static const char tokenlist[] ALIGN1 =
388 "\1(" NTC
389 "\1)" NTC
390 "\1/" NTC
391 "\2>>" "\1>" "\1|" NTC
392 "\2++" "\2--" NTC
393 "\2++" "\2--" "\1$" NTC
394 "\2==" "\1=" "\2+=" "\2-="
395 "\2*=" "\2/=" "\2%=" "\2^="
396 "\1+" "\1-" "\3**=" "\2**"
397 "\1/" "\1%" "\1^" "\1*"
398 "\2!=" "\2>=" "\2<=" "\1>"
399 "\1<" "\2!~" "\1~" "\2&&"
400 "\2||" "\1?" "\1:" NTC
401 "\2in" NTC
402 "\1," NTC
403 "\1|" NTC
404 "\1+" "\1-" "\1!" NTC
405 "\1]" NTC
406 "\1{" NTC
407 "\1}" NTC
408 "\1;" NTC
409 "\1\n" NTC
410 "\2if" "\2do" "\3for" "\5break"
411 "\10continue" "\6delete" "\5print"
412 "\6printf" "\4next" "\10nextfile"
413 "\6return" "\4exit" NTC
414 "\5while" NTC
415 "\4else" NTC
416 "\3and" "\5compl" "\6lshift" "\2or"
417 "\6rshift" "\3xor"
418 "\5close" "\6system" "\6fflush" "\5atan2"
419 "\3cos" "\3exp" "\3int" "\3log"
420 "\4rand" "\3sin" "\4sqrt" "\5srand"
421 "\6gensub" "\4gsub" "\5index"
422 "\5match" "\5split" "\7sprintf" "\3sub"
423 "\6substr" "\7systime" "\10strftime" "\6mktime"
424 "\7tolower" "\7toupper" NTC
425 "\6length" NTC
426 "\7getline" NTC
427 "\4func" "\10function" NTC
428 "\5BEGIN" NTC
429 "\3END"
430
431 ;
432
433static const uint32_t tokeninfo[] ALIGN4 = {
434 0,
435 0,
436#define TI_REGEXP OC_REGEXP
437 TI_REGEXP,
438 xS|'a', xS|'w', xS|'|',
439 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
440#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
441#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
442 TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
443 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
444 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
445 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
446 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
447 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
448#define TI_LESS (OC_COMPARE|VV|P(39)|2)
449 TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
450#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
451#define TI_COLON (OC_COLON|xx|P(67)|':')
452 OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
453#define TI_IN (OC_IN|SV|P(49))
454 TI_IN,
455#define TI_COMMA (OC_COMMA|SS|P(80))
456 TI_COMMA,
457#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
458 TI_PGETLINE,
459 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
460 0,
461 0,
462 0,
463 0,
464 0,
465 ST_IF, ST_DO, ST_FOR, OC_BREAK,
466 OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
467 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
468 OC_RETURN|Vx, OC_EXIT|Nx,
469 ST_WHILE,
470 0,
471
472
473
474
475
476
477
478#define OC_B OC_BUILTIN
479#define OC_F OC_FBLTIN
480#define A1 P(0x40)
481#define A2 P(0x80)
482#define A3 P(0xc0)
483#define __v P(1)
484#define _vv P(3)
485#define __s__v P(9)
486#define __s_vv P(0x0b)
487#define __svvv P(0x0f)
488#define _ss_vv P(0x1b)
489#define _s_vv_ P(0x16)
490#define ss_vv_ P(0x36)
491 OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2,
492 OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2,
493 OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2,
494 OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx,
495 OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx,
496 OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2,
497 OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,
498 OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv,
499 OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1,
500 OC_F|F_le|Sx,
501 OC_GETLINE|SV,
502 0, 0,
503 0,
504 0
505#undef A1
506#undef A2
507#undef A3
508#undef OC_B
509#undef OC_F
510};
511
512
513
514enum {
515 CONVFMT, OFMT, FS, OFS,
516 ORS, RS, RT, FILENAME,
517 SUBSEP, F0, ARGIND, ARGC,
518 ARGV, ERRNO, FNR, NR,
519 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
520};
521
522static const char vNames[] ALIGN1 =
523 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
524 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
525 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
526 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
527 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
528
529static const char vValues[] ALIGN1 =
530 "%.6g\0" "%.6g\0" " \0" " \0"
531 "\n\0" "\n\0" "\0" "\0"
532 "\034\0" "\0" "\377";
533
534
535#define FIRST_PRIME 61
536static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
537
538
539
540
541
542
543
544struct globals {
545 double t_double;
546 chain beginseq, mainseq, endseq;
547 chain *seq;
548 node *break_ptr, *continue_ptr;
549 rstream *iF;
550 xhash *ahash;
551 xhash *fnhash;
552 xhash *vhash;
553
554
555 const char *g_progname;
556 int g_lineno;
557 int nfields;
558 int maxfields;
559 var *Fields;
560 char *g_pos;
561 char g_saved_ch;
562 smallint icase;
563 smallint exiting;
564 smallint nextrec;
565 smallint nextfile;
566 smallint is_f0_split;
567 smallint t_rollback;
568
569
570 smallint next_token__concat_inserted;
571 uint32_t next_token__save_tclass;
572 uint32_t next_token__save_info;
573};
574struct globals2 {
575 uint32_t t_info;
576 uint32_t t_tclass;
577 char *t_string;
578 int t_lineno;
579
580 var *intvar[NUM_INTERNAL_VARS];
581
582
583 char *split_f0__fstrings;
584
585 rstream next_input_file__rsm;
586 smallint next_input_file__files_happen;
587
588 smalluint exitcode;
589
590 unsigned evaluate__seed;
591 var *evaluate__fnargs;
592 regex_t evaluate__sreg;
593
594 var ptest__tmpvar;
595 var awk_printf__tmpvar;
596 var as_regex__tmpvar;
597 var exit__tmpvar;
598 var main__tmpvar;
599
600 tsplitter exec_builtin__tspl;
601
602
603 tsplitter fsplitter, rsplitter;
604
605 char g_buf[MAXVARFMT + 1];
606};
607#define G1 (ptr_to_globals[-1])
608#define G (*(struct globals2 *)ptr_to_globals)
609
610
611
612
613
614#define t_double (G1.t_double )
615#define beginseq (G1.beginseq )
616#define mainseq (G1.mainseq )
617#define endseq (G1.endseq )
618#define seq (G1.seq )
619#define break_ptr (G1.break_ptr )
620#define continue_ptr (G1.continue_ptr)
621#define iF (G1.iF )
622#define ahash (G1.ahash )
623#define fnhash (G1.fnhash )
624#define vhash (G1.vhash )
625#define fdhash ahash
626
627
628
629#define g_progname (G1.g_progname )
630#define g_lineno (G1.g_lineno )
631#define nfields (G1.nfields )
632#define maxfields (G1.maxfields )
633#define Fields (G1.Fields )
634#define g_pos (G1.g_pos )
635#define g_saved_ch (G1.g_saved_ch )
636#define icase (G1.icase )
637#define exiting (G1.exiting )
638#define nextrec (G1.nextrec )
639#define nextfile (G1.nextfile )
640#define is_f0_split (G1.is_f0_split )
641#define t_rollback (G1.t_rollback )
642#define t_info (G.t_info )
643#define t_tclass (G.t_tclass )
644#define t_string (G.t_string )
645#define t_lineno (G.t_lineno )
646#define intvar (G.intvar )
647#define fsplitter (G.fsplitter )
648#define rsplitter (G.rsplitter )
649#define g_buf (G.g_buf )
650#define INIT_G() do { \
651 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
652 t_tclass = TC_NEWLINE; \
653 G.evaluate__seed = 1; \
654} while (0)
655
656static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
657static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
658static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
659static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
660static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments";
661static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
662static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
663static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
664static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
665static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
666
667static int awk_exit(void) NORETURN;
668
669static void syntax_error(const char *message) NORETURN;
670static void syntax_error(const char *message)
671{
672 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
673}
674
675
676
677static unsigned hashidx(const char *name)
678{
679 unsigned idx = 0;
680
681 while (*name)
682 idx = *name++ + (idx << 6) - idx;
683 return idx;
684}
685
686
687static xhash *hash_init(void)
688{
689 xhash *newhash;
690
691 newhash = xzalloc(sizeof(*newhash));
692 newhash->csize = FIRST_PRIME;
693 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
694
695 return newhash;
696}
697
698static void hash_clear(xhash *hash)
699{
700 unsigned i;
701 hash_item *hi, *thi;
702
703 for (i = 0; i < hash->csize; i++) {
704 hi = hash->items[i];
705 while (hi) {
706 thi = hi;
707 hi = hi->next;
708
709 free(thi->data.v.string);
710 free(thi);
711 }
712 hash->items[i] = NULL;
713 }
714 hash->glen = hash->nel = 0;
715}
716
717#if 0
718static void hash_free(xhash *hash)
719{
720 hash_clear(hash);
721 free(hash->items);
722 free(hash);
723}
724#endif
725
726
727static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
728{
729 hash_item *hi;
730
731 hi = hash->items[idx % hash->csize];
732 while (hi) {
733 if (strcmp(hi->name, name) == 0)
734 return &hi->data;
735 hi = hi->next;
736 }
737 return NULL;
738}
739
740static void *hash_search(xhash *hash, const char *name)
741{
742 return hash_search3(hash, name, hashidx(name));
743}
744
745
746static void hash_rebuild(xhash *hash)
747{
748 unsigned newsize, i, idx;
749 hash_item **newitems, *hi, *thi;
750
751 if (hash->nprime == ARRAY_SIZE(PRIMES))
752 return;
753
754 newsize = PRIMES[hash->nprime++];
755 newitems = xzalloc(newsize * sizeof(newitems[0]));
756
757 for (i = 0; i < hash->csize; i++) {
758 hi = hash->items[i];
759 while (hi) {
760 thi = hi;
761 hi = thi->next;
762 idx = hashidx(thi->name) % newsize;
763 thi->next = newitems[idx];
764 newitems[idx] = thi;
765 }
766 }
767
768 free(hash->items);
769 hash->csize = newsize;
770 hash->items = newitems;
771}
772
773
774static void *hash_find(xhash *hash, const char *name)
775{
776 hash_item *hi;
777 unsigned idx;
778 int l;
779
780 idx = hashidx(name);
781 hi = hash_search3(hash, name, idx);
782 if (!hi) {
783 if (++hash->nel > hash->csize * 8)
784 hash_rebuild(hash);
785
786 l = strlen(name) + 1;
787 hi = xzalloc(sizeof(*hi) + l);
788 strcpy(hi->name, name);
789
790 idx = idx % hash->csize;
791 hi->next = hash->items[idx];
792 hash->items[idx] = hi;
793 hash->glen += l;
794 }
795 return &hi->data;
796}
797
798#define findvar(hash, name) ((var*) hash_find((hash), (name)))
799#define newvar(name) ((var*) hash_find(vhash, (name)))
800#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
801#define newfunc(name) ((func*) hash_find(fnhash, (name)))
802
803static void hash_remove(xhash *hash, const char *name)
804{
805 hash_item *hi, **phi;
806
807 phi = &hash->items[hashidx(name) % hash->csize];
808 while (*phi) {
809 hi = *phi;
810 if (strcmp(hi->name, name) == 0) {
811 hash->glen -= (strlen(name) + 1);
812 hash->nel--;
813 *phi = hi->next;
814 free(hi);
815 break;
816 }
817 phi = &hi->next;
818 }
819}
820
821
822
823static char *skip_spaces(char *p)
824{
825 for (;;) {
826 if (*p == '\\' && p[1] == '\n') {
827 p++;
828 t_lineno++;
829 } else if (*p != ' ' && *p != '\t') {
830 break;
831 }
832 p++;
833 }
834 return p;
835}
836
837
838static char *nextword(char **s)
839{
840 char *p = *s;
841 char *q = p;
842 while (*q++ != '\0')
843 continue;
844 *s = q;
845 return p;
846}
847
848static char nextchar(char **s)
849{
850 char c, *pps;
851 again:
852 c = *(*s)++;
853 pps = *s;
854 if (c == '\\')
855 c = bb_process_escape_sequence((const char**)s);
856
857
858
859
860 if (c == '\\' && *s == pps) {
861 c = *(*s);
862 if (c) {
863 (*s)++;
864 if (c == '\n')
865 goto again;
866 }
867 }
868 return c;
869}
870
871
872
873static void unescape_string_in_place(char *s1)
874{
875 char *s = s1;
876 while ((*s1 = nextchar(&s)) != '\0')
877 s1++;
878}
879
880static ALWAYS_INLINE int isalnum_(int c)
881{
882 return (isalnum(c) || c == '_');
883}
884
885static double my_strtod(char **pp)
886{
887 char *cp = *pp;
888 return strtod(cp, pp);
889}
890#if ENABLE_DESKTOP
891static double my_strtod_or_hexoct(char **pp)
892{
893 char *cp = *pp;
894 if (cp[0] == '0') {
895
896 char c = (cp[1] | 0x20);
897 if (c == 'x' || isdigit(cp[1])) {
898 unsigned long long ull = strtoull(cp, pp, 0);
899 if (c == 'x')
900 return ull;
901 c = **pp;
902 if (!isdigit(c) && c != '.')
903 return ull;
904
905
906
907
908
909 }
910 }
911 return strtod(cp, pp);
912}
913#else
914# define my_strtod_or_hexoct(p) my_strtod(p)
915#endif
916
917
918
919static void fmt_num(const char *format, double n)
920{
921 if (n == (long long)n) {
922 snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
923 } else {
924 const char *s = format;
925 char c;
926
927 do { c = *s; } while (c && *++s);
928 if (strchr("diouxX", c)) {
929 snprintf(g_buf, MAXVARFMT, format, (int)n);
930 } else if (strchr("eEfFgGaA", c)) {
931 snprintf(g_buf, MAXVARFMT, format, n);
932 } else {
933 syntax_error(EMSG_INV_FMT);
934 }
935 }
936}
937
938static xhash *iamarray(var *a)
939{
940 while (a->type & VF_CHILD)
941 a = a->x.parent;
942
943 if (!(a->type & VF_ARRAY)) {
944 a->type |= VF_ARRAY;
945 a->x.array = hash_init();
946 }
947 return a->x.array;
948}
949
950#define clear_array(array) hash_clear(array)
951
952
953static var *clrvar(var *v)
954{
955 if (!(v->type & VF_FSTR))
956 free(v->string);
957
958 v->type &= VF_DONTTOUCH;
959 v->type |= VF_DIRTY;
960 v->string = NULL;
961 return v;
962}
963
964static void handle_special(var *);
965
966
967static var *setvar_p(var *v, char *value)
968{
969 clrvar(v);
970 v->string = value;
971 handle_special(v);
972 return v;
973}
974
975
976static var *setvar_s(var *v, const char *value)
977{
978 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
979}
980
981
982static var *setvar_u(var *v, const char *value)
983{
984 v = setvar_s(v, value);
985 v->type |= VF_USER;
986 return v;
987}
988
989
990static void setari_u(var *a, int idx, const char *s)
991{
992 var *v;
993
994 v = findvar(iamarray(a), itoa(idx));
995 setvar_u(v, s);
996}
997
998
999static var *setvar_i(var *v, double value)
1000{
1001 clrvar(v);
1002 v->type |= VF_NUMBER;
1003 v->number = value;
1004 handle_special(v);
1005 return v;
1006}
1007
1008static const char *getvar_s(var *v)
1009{
1010
1011 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
1012 fmt_num(getvar_s(intvar[CONVFMT]), v->number);
1013 v->string = xstrdup(g_buf);
1014 v->type |= VF_CACHED;
1015 }
1016 return (v->string == NULL) ? "" : v->string;
1017}
1018
1019static double getvar_i(var *v)
1020{
1021 char *s;
1022
1023 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
1024 v->number = 0;
1025 s = v->string;
1026 if (s && *s) {
1027 debug_printf_eval("getvar_i: '%s'->", s);
1028 v->number = my_strtod(&s);
1029
1030 debug_printf_eval("%f (s:'%s')\n", v->number, s);
1031 if (v->type & VF_USER) {
1032
1033 s = skip_spaces(s);
1034 if (*s != '\0')
1035 v->type &= ~VF_USER;
1036 }
1037 } else {
1038 debug_printf_eval("getvar_i: '%s'->zero\n", s);
1039 v->type &= ~VF_USER;
1040 }
1041 v->type |= VF_CACHED;
1042 }
1043 debug_printf_eval("getvar_i: %f\n", v->number);
1044 return v->number;
1045}
1046
1047
1048static unsigned long getvar_i_int(var *v)
1049{
1050 double d = getvar_i(v);
1051
1052
1053
1054 if (d >= 0)
1055 return (unsigned long)d;
1056
1057 return - (long) (unsigned long) (-d);
1058}
1059
1060static var *copyvar(var *dest, const var *src)
1061{
1062 if (dest != src) {
1063 clrvar(dest);
1064 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
1065 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
1066 dest->number = src->number;
1067 if (src->string)
1068 dest->string = xstrdup(src->string);
1069 }
1070 handle_special(dest);
1071 return dest;
1072}
1073
1074static var *incvar(var *v)
1075{
1076 return setvar_i(v, getvar_i(v) + 1.0);
1077}
1078
1079
1080static int is_numeric(var *v)
1081{
1082 getvar_i(v);
1083 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
1084}
1085
1086
1087static int istrue(var *v)
1088{
1089 if (is_numeric(v))
1090 return (v->number != 0);
1091 return (v->string && v->string[0]);
1092}
1093
1094
1095
1096
1097
1098
1099
1100static uint32_t next_token(uint32_t expected)
1101{
1102#define concat_inserted (G1.next_token__concat_inserted)
1103#define save_tclass (G1.next_token__save_tclass)
1104#define save_info (G1.next_token__save_info)
1105
1106 char *p;
1107 const char *tl;
1108 const uint32_t *ti;
1109 uint32_t tc, last_token_class;
1110
1111 last_token_class = t_tclass;
1112
1113 debug_printf_parse("%s() expected(%x):", __func__, expected);
1114 debug_parse_print_tc(expected);
1115 debug_printf_parse("\n");
1116
1117 if (t_rollback) {
1118 debug_printf_parse("%s: using rolled-back token\n", __func__);
1119 t_rollback = FALSE;
1120 } else if (concat_inserted) {
1121 debug_printf_parse("%s: using concat-inserted token\n", __func__);
1122 concat_inserted = FALSE;
1123 t_tclass = save_tclass;
1124 t_info = save_info;
1125 } else {
1126 p = g_pos;
1127 if (g_saved_ch != '\0') {
1128 *p = g_saved_ch;
1129 g_saved_ch = '\0';
1130 }
1131 readnext:
1132 p = skip_spaces(p);
1133 g_lineno = t_lineno;
1134 if (*p == '#')
1135 while (*p != '\n' && *p != '\0')
1136 p++;
1137
1138 if (*p == '\0') {
1139 tc = TC_EOF;
1140 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1141 } else if (*p == '"') {
1142
1143 char *s = t_string = ++p;
1144 while (*p != '"') {
1145 char *pp;
1146 if (*p == '\0' || *p == '\n')
1147 syntax_error(EMSG_UNEXP_EOS);
1148 pp = p;
1149 *s++ = nextchar(&pp);
1150 p = pp;
1151 }
1152 p++;
1153 *s = '\0';
1154 tc = TC_STRING;
1155 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1156 } else if ((expected & TC_REGEXP) && *p == '/') {
1157
1158 char *s = t_string = ++p;
1159 while (*p != '/') {
1160 if (*p == '\0' || *p == '\n')
1161 syntax_error(EMSG_UNEXP_EOS);
1162 *s = *p++;
1163 if (*s++ == '\\') {
1164 char *pp = p;
1165 s[-1] = bb_process_escape_sequence((const char **)&pp);
1166 if (*p == '\\')
1167 *s++ = '\\';
1168 if (pp == p)
1169 *s++ = *p++;
1170 else
1171 p = pp;
1172 }
1173 }
1174 p++;
1175 *s = '\0';
1176 tc = TC_REGEXP;
1177 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1178
1179 } else if (*p == '.' || isdigit(*p)) {
1180
1181 char *pp = p;
1182 t_double = my_strtod_or_hexoct(&pp);
1183
1184 p = pp;
1185 if (*p == '.')
1186 syntax_error(EMSG_UNEXP_TOKEN);
1187 tc = TC_NUMBER;
1188 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1189 } else {
1190 char *end_of_name;
1191
1192 if (*p == '\n')
1193 t_lineno++;
1194
1195
1196 tl = tokenlist;
1197 tc = 0x00000001;
1198 ti = tokeninfo;
1199 while (*tl) {
1200 int l = (unsigned char) *tl++;
1201 if (l == (unsigned char) NTCC) {
1202 tc <<= 1;
1203 continue;
1204 }
1205
1206
1207
1208
1209 if ((tc & (expected | TS_WORD | TC_NEWLINE))
1210 && strncmp(p, tl, l) == 0
1211 && !((tc & TS_WORD) && isalnum_(p[l]))
1212 ) {
1213
1214 t_info = *ti;
1215 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1216 p += l;
1217 goto token_found;
1218 }
1219 ti++;
1220 tl += l;
1221 }
1222
1223
1224
1225 if (!isalnum_(*p))
1226 syntax_error(EMSG_UNEXP_TOKEN);
1227
1228 t_string = p;
1229 while (isalnum_(*p))
1230 p++;
1231 end_of_name = p;
1232
1233 if (last_token_class == TC_FUNCDECL)
1234
1235 p = skip_spaces(p);
1236 else if (expected & TC_ARRAY) {
1237
1238 char *s = skip_spaces(p);
1239 if (*s == '[')
1240 p = s;
1241 }
1242
1243
1244
1245
1246
1247
1248
1249
1250 if (*p == '(') {
1251 p++;
1252 tc = TC_FUNCTION;
1253 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1254 } else if (*p == '[') {
1255 p++;
1256 tc = TC_ARRAY;
1257 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1258 } else {
1259 tc = TC_VARIABLE;
1260 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1261 if (end_of_name == p) {
1262
1263
1264
1265
1266 g_saved_ch = *end_of_name;
1267
1268
1269
1270
1271 }
1272 }
1273 *end_of_name = '\0';
1274 }
1275 token_found:
1276 g_pos = p;
1277
1278
1279 if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
1280 goto readnext;
1281
1282
1283 debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
1284 (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
1285 !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
1286 if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
1287 && !(last_token_class == TC_LENGTH && tc == TC_LPAREN)
1288 ) {
1289 concat_inserted = TRUE;
1290 save_tclass = tc;
1291 save_info = t_info;
1292 tc = TC_BINOPX;
1293 t_info = OC_CONCAT | SS | P(35);
1294 }
1295
1296 t_tclass = tc;
1297 debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
1298 }
1299
1300 if (!(t_tclass & expected)) {
1301 syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
1302 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1303 }
1304
1305 debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
1306 debug_parse_print_tc(t_tclass);
1307 debug_printf_parse("\n");
1308
1309 return t_tclass;
1310#undef concat_inserted
1311#undef save_tclass
1312#undef save_info
1313}
1314
1315static ALWAYS_INLINE void rollback_token(void)
1316{
1317 t_rollback = TRUE;
1318}
1319
1320static node *new_node(uint32_t info)
1321{
1322 node *n;
1323
1324 n = xzalloc(sizeof(node));
1325 n->info = info;
1326 n->lineno = g_lineno;
1327 return n;
1328}
1329
1330static void mk_re_node(const char *s, node *n, regex_t *re)
1331{
1332 n->info = TI_REGEXP;
1333 n->l.re = re;
1334 n->r.ire = re + 1;
1335 xregcomp(re, s, REG_EXTENDED);
1336 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1337}
1338
1339static node *parse_expr(uint32_t);
1340
1341static node *parse_lrparen_list(void)
1342{
1343 next_token(TC_LPAREN);
1344 return parse_expr(TC_RPAREN);
1345}
1346
1347
1348
1349static node *parse_expr(uint32_t term_tc)
1350{
1351 node sn;
1352 node *cn = &sn;
1353 node *vn, *glptr;
1354 uint32_t tc, expected_tc;
1355 var *v;
1356
1357 debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
1358 debug_parse_print_tc(term_tc);
1359 debug_printf_parse("\n");
1360
1361 sn.info = PRIMASK;
1362 sn.r.n = sn.a.n = glptr = NULL;
1363 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
1364
1365 while (!((tc = next_token(expected_tc)) & term_tc)) {
1366
1367 if (glptr && (t_info == TI_LESS)) {
1368
1369 debug_printf_parse("%s: input redir\n", __func__);
1370 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1371 cn->a.n = glptr;
1372 expected_tc = TS_OPERAND | TS_UOPPRE;
1373 glptr = NULL;
1374 continue;
1375 }
1376 if (tc & (TS_BINOP | TC_UOPPOST)) {
1377 debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1378
1379
1380 vn = cn;
1381 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1382 || ((t_info == vn->info) && t_info == TI_COLON)
1383 ) {
1384 vn = vn->a.n;
1385 if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1386 }
1387 if (t_info == TI_TERNARY)
1388
1389 t_info += P(6);
1390 cn = vn->a.n->r.n = new_node(t_info);
1391 cn->a.n = vn->a.n;
1392 if (tc & TS_BINOP) {
1393 cn->l.n = vn;
1394
1395
1396
1397
1398
1399
1400 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1401 if (t_info == TI_PGETLINE) {
1402
1403 next_token(TC_GETLINE);
1404
1405 cn->info &= ~PRIMASK;
1406 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1407 }
1408 } else {
1409 cn->r.n = vn;
1410 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1411 }
1412 vn->a.n = cn;
1413 continue;
1414 }
1415
1416 debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
1417
1418
1419 vn = cn;
1420 cn = vn->r.n = new_node(t_info);
1421 cn->a.n = vn;
1422
1423 expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1424 if (t_info == TI_PREINC || t_info == TI_PREDEC)
1425 expected_tc = TS_LVALUE | TC_UOPPRE1;
1426
1427 if (!(tc & (TS_OPERAND | TC_REGEXP)))
1428 continue;
1429
1430 debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
1431 expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
1432
1433
1434 switch (tc) {
1435 case TC_VARIABLE:
1436 case TC_ARRAY:
1437 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1438 cn->info = OC_VAR;
1439 v = hash_search(ahash, t_string);
1440 if (v != NULL) {
1441 cn->info = OC_FNARG;
1442 cn->l.aidx = v->x.aidx;
1443 } else {
1444 cn->l.v = newvar(t_string);
1445 }
1446 if (tc & TC_ARRAY) {
1447 cn->info |= xS;
1448 cn->r.n = parse_expr(TC_ARRTERM);
1449 }
1450 break;
1451
1452 case TC_NUMBER:
1453 case TC_STRING:
1454 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1455 cn->info = OC_VAR;
1456 v = cn->l.v = xzalloc(sizeof(var));
1457 if (tc & TC_NUMBER)
1458 setvar_i(v, t_double);
1459 else {
1460 setvar_s(v, t_string);
1461 expected_tc &= ~TC_UOPPOST;
1462 }
1463 break;
1464
1465 case TC_REGEXP:
1466 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1467 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1468 break;
1469
1470 case TC_FUNCTION:
1471 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1472 cn->info = OC_FUNC;
1473 cn->r.f = newfunc(t_string);
1474 cn->l.n = parse_expr(TC_RPAREN);
1475 break;
1476
1477 case TC_LPAREN:
1478 debug_printf_parse("%s: TC_LPAREN\n", __func__);
1479 cn = vn->r.n = parse_expr(TC_RPAREN);
1480 if (!cn)
1481 syntax_error("Empty sequence");
1482 cn->a.n = vn;
1483 break;
1484
1485 case TC_GETLINE:
1486 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1487 glptr = cn;
1488 expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1489 break;
1490
1491 case TC_BUILTIN:
1492 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1493 cn->l.n = parse_lrparen_list();
1494 break;
1495
1496 case TC_LENGTH:
1497 debug_printf_parse("%s: TC_LENGTH\n", __func__);
1498 tc = next_token(TC_LPAREN
1499 | TC_SEMICOL
1500 | TC_NEWLINE
1501 | TC_RBRACE
1502 | TC_BINOPX
1503 | TC_COMMA
1504 );
1505 if (tc != TC_LPAREN)
1506 rollback_token();
1507 else {
1508
1509 cn->l.n = parse_expr(TC_RPAREN);
1510 }
1511 break;
1512 }
1513 }
1514
1515 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1516 return sn.r.n;
1517}
1518
1519
1520static node *chain_node(uint32_t info)
1521{
1522 node *n;
1523
1524 if (!seq->first)
1525 seq->first = seq->last = new_node(0);
1526
1527 if (seq->programname != g_progname) {
1528 seq->programname = g_progname;
1529 n = chain_node(OC_NEWSOURCE);
1530 n->l.new_progname = g_progname;
1531 }
1532
1533 n = seq->last;
1534 n->info = info;
1535 seq->last = n->a.n = new_node(OC_DONE);
1536
1537 return n;
1538}
1539
1540static void chain_expr(uint32_t info)
1541{
1542 node *n;
1543
1544 n = chain_node(info);
1545
1546 n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1547 if ((info & OF_REQUIRED) && !n->l.n)
1548 syntax_error(EMSG_TOO_FEW_ARGS);
1549
1550 if (t_tclass & TC_RBRACE)
1551 rollback_token();
1552}
1553
1554static void chain_group(void);
1555
1556static node *chain_loop(node *nn)
1557{
1558 node *n, *n2, *save_brk, *save_cont;
1559
1560 save_brk = break_ptr;
1561 save_cont = continue_ptr;
1562
1563 n = chain_node(OC_BR | Vx);
1564 continue_ptr = new_node(OC_EXEC);
1565 break_ptr = new_node(OC_EXEC);
1566 chain_group();
1567 n2 = chain_node(OC_EXEC | Vx);
1568 n2->l.n = nn;
1569 n2->a.n = n;
1570 continue_ptr->a.n = n2;
1571 break_ptr->a.n = n->r.n = seq->last;
1572
1573 continue_ptr = save_cont;
1574 break_ptr = save_brk;
1575
1576 return n;
1577}
1578
1579static void chain_until_rbrace(void)
1580{
1581 uint32_t tc;
1582 while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1583 debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1584 if (tc == TC_NEWLINE)
1585 continue;
1586 rollback_token();
1587 chain_group();
1588 }
1589 debug_printf_parse("%s: TC_RBRACE\n", __func__);
1590}
1591
1592
1593static void chain_group(void)
1594{
1595 uint32_t tc;
1596 node *n, *n2, *n3;
1597
1598 do {
1599 tc = next_token(TS_GRPSEQ);
1600 } while (tc == TC_NEWLINE);
1601
1602 if (tc == TC_LBRACE) {
1603 debug_printf_parse("%s: TC_LBRACE\n", __func__);
1604 chain_until_rbrace();
1605 return;
1606 }
1607 if (tc & (TS_OPSEQ | TC_SEMICOL)) {
1608 debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
1609 rollback_token();
1610 chain_expr(OC_EXEC | Vx);
1611 return;
1612 }
1613
1614
1615 debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
1616 switch (t_info & OPCLSMASK) {
1617 case ST_IF:
1618 debug_printf_parse("%s: ST_IF\n", __func__);
1619 n = chain_node(OC_BR | Vx);
1620 n->l.n = parse_lrparen_list();
1621 chain_group();
1622 n2 = chain_node(OC_EXEC);
1623 n->r.n = seq->last;
1624 if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
1625 chain_group();
1626 n2->a.n = seq->last;
1627 } else {
1628 rollback_token();
1629 }
1630 break;
1631
1632 case ST_WHILE:
1633 debug_printf_parse("%s: ST_WHILE\n", __func__);
1634 n2 = parse_lrparen_list();
1635 n = chain_loop(NULL);
1636 n->l.n = n2;
1637 break;
1638
1639 case ST_DO:
1640 debug_printf_parse("%s: ST_DO\n", __func__);
1641 n2 = chain_node(OC_EXEC);
1642 n = chain_loop(NULL);
1643 n2->a.n = n->a.n;
1644 next_token(TC_WHILE);
1645 n->l.n = parse_lrparen_list();
1646 break;
1647
1648 case ST_FOR:
1649 debug_printf_parse("%s: ST_FOR\n", __func__);
1650 next_token(TC_LPAREN);
1651 n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
1652 if (t_tclass & TC_RPAREN) {
1653 if (!n2 || n2->info != TI_IN)
1654 syntax_error(EMSG_UNEXP_TOKEN);
1655 n = chain_node(OC_WALKINIT | VV);
1656 n->l.n = n2->l.n;
1657 n->r.n = n2->r.n;
1658 n = chain_loop(NULL);
1659 n->info = OC_WALKNEXT | Vx;
1660 n->l.n = n2->l.n;
1661 } else {
1662 n = chain_node(OC_EXEC | Vx);
1663 n->l.n = n2;
1664 n2 = parse_expr(TC_SEMICOL);
1665 n3 = parse_expr(TC_RPAREN);
1666 n = chain_loop(n3);
1667 n->l.n = n2;
1668 if (!n2)
1669 n->info = OC_EXEC;
1670 }
1671 break;
1672
1673 case OC_PRINT:
1674 case OC_PRINTF:
1675 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1676 n = chain_node(t_info);
1677 n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
1678 if (t_tclass & TC_OUTRDR) {
1679 n->info |= t_info;
1680 n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1681 }
1682 if (t_tclass & TC_RBRACE)
1683 rollback_token();
1684 break;
1685
1686 case OC_BREAK:
1687 debug_printf_parse("%s: OC_BREAK\n", __func__);
1688 n = chain_node(OC_EXEC);
1689 if (!break_ptr)
1690 syntax_error("'break' not in a loop");
1691 n->a.n = break_ptr;
1692 chain_expr(t_info);
1693 break;
1694
1695 case OC_CONTINUE:
1696 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1697 n = chain_node(OC_EXEC);
1698 if (!continue_ptr)
1699 syntax_error("'continue' not in a loop");
1700 n->a.n = continue_ptr;
1701 chain_expr(t_info);
1702 break;
1703
1704
1705 default:
1706 debug_printf_parse("%s: default\n", __func__);
1707 chain_expr(t_info);
1708 }
1709}
1710
1711static void parse_program(char *p)
1712{
1713 debug_printf_parse("%s()\n", __func__);
1714
1715 g_pos = p;
1716 t_lineno = 1;
1717 for (;;) {
1718 uint32_t tclass;
1719
1720 tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1721 | TC_EOF | TC_NEWLINE );
1722 got_tok:
1723 if (tclass == TC_EOF) {
1724 debug_printf_parse("%s: TC_EOF\n", __func__);
1725 break;
1726 }
1727 if (tclass == TC_NEWLINE) {
1728 debug_printf_parse("%s: TC_NEWLINE\n", __func__);
1729 continue;
1730 }
1731 if (tclass == TC_BEGIN) {
1732 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1733 seq = &beginseq;
1734
1735 next_token(TC_LBRACE);
1736 chain_until_rbrace();
1737 goto next_tok;
1738 }
1739 if (tclass == TC_END) {
1740 debug_printf_parse("%s: TC_END\n", __func__);
1741 seq = &endseq;
1742
1743 next_token(TC_LBRACE);
1744 chain_until_rbrace();
1745 goto next_tok;
1746 }
1747 if (tclass == TC_FUNCDECL) {
1748 func *f;
1749
1750 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1751 next_token(TC_FUNCTION);
1752 f = newfunc(t_string);
1753 if (f->defined)
1754 syntax_error("Duplicate function");
1755 f->defined = 1;
1756
1757
1758
1759 for (;;) {
1760 var *v;
1761 if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1762 if (f->nargs == 0)
1763 break;
1764
1765 syntax_error(EMSG_UNEXP_TOKEN);
1766 }
1767 v = findvar(ahash, t_string);
1768 v->x.aidx = f->nargs++;
1769
1770 if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
1771 break;
1772
1773 }
1774 seq = &f->body;
1775
1776 while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1777 continue;
1778 chain_until_rbrace();
1779 hash_clear(ahash);
1780 goto next_tok;
1781 }
1782 seq = &mainseq;
1783 if (tclass & TS_OPSEQ) {
1784 node *cn;
1785
1786 debug_printf_parse("%s: TS_OPSEQ\n", __func__);
1787 rollback_token();
1788 cn = chain_node(OC_TEST);
1789 cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
1790 if (t_tclass == TC_LBRACE) {
1791 debug_printf_parse("%s: TC_LBRACE\n", __func__);
1792 chain_until_rbrace();
1793 } else {
1794
1795 debug_printf_parse("%s: !TC_LBRACE\n", __func__);
1796 chain_node(OC_PRINT);
1797 }
1798 cn->r.n = mainseq.last;
1799 goto next_tok;
1800 }
1801
1802 debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1803 chain_until_rbrace();
1804 next_tok:
1805
1806 tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1807 | TC_EOF | TC_NEWLINE | TC_SEMICOL);
1808
1809
1810
1811
1812
1813
1814 if (tclass != TC_SEMICOL)
1815 goto got_tok;
1816
1817 }
1818}
1819
1820
1821
1822
1823static var *nvalloc(int sz)
1824{
1825 return xzalloc(sz * sizeof(var));
1826}
1827
1828static void nvfree(var *v, int sz)
1829{
1830 var *p = v;
1831
1832 while (--sz >= 0) {
1833 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1834 clear_array(iamarray(p));
1835 free(p->x.array->items);
1836 free(p->x.array);
1837 }
1838 if (p->type & VF_WALK) {
1839 walker_list *n;
1840 walker_list *w = p->x.walker;
1841 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1842 p->x.walker = NULL;
1843 while (w) {
1844 n = w->prev;
1845 debug_printf_walker(" free(%p)\n", w);
1846 free(w);
1847 w = n;
1848 }
1849 }
1850 clrvar(p);
1851 p++;
1852 }
1853
1854 free(v);
1855}
1856
1857static node *mk_splitter(const char *s, tsplitter *spl)
1858{
1859 regex_t *re, *ire;
1860 node *n;
1861
1862 re = &spl->re[0];
1863 ire = &spl->re[1];
1864 n = &spl->n;
1865 if (n->info == TI_REGEXP) {
1866 regfree(re);
1867 regfree(ire);
1868 }
1869 if (s[0] && s[1]) {
1870 mk_re_node(s, n, re);
1871 } else {
1872 n->info = (uint32_t) s[0];
1873 }
1874
1875 return n;
1876}
1877
1878static var *evaluate(node *, var *);
1879
1880
1881
1882
1883
1884static regex_t *as_regex(node *op, regex_t *preg)
1885{
1886 int cflags;
1887 const char *s;
1888
1889 if (op->info == TI_REGEXP) {
1890 return icase ? op->r.ire : op->l.re;
1891 }
1892
1893
1894#define TMPVAR (&G.as_regex__tmpvar)
1895
1896
1897
1898
1899 s = getvar_s(evaluate(op, TMPVAR));
1900
1901 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1902
1903
1904
1905
1906
1907 if (regcomp(preg, s, cflags)) {
1908 cflags &= ~REG_EXTENDED;
1909 xregcomp(preg, s, cflags);
1910 }
1911
1912#undef TMPVAR
1913 return preg;
1914}
1915
1916
1917
1918
1919
1920static char* qrealloc(char *b, int n, int *size)
1921{
1922 if (!b || n >= *size) {
1923 *size = n + (n>>1) + 80;
1924 b = xrealloc(b, *size);
1925 }
1926 return b;
1927}
1928
1929
1930static void fsrealloc(int size)
1931{
1932 int i, newsize;
1933
1934 if (size >= maxfields) {
1935
1936 if (size > 0xffffff)
1937 bb_die_memory_exhausted();
1938
1939 i = maxfields;
1940 maxfields = size + 16;
1941
1942 newsize = maxfields * sizeof(Fields[0]);
1943 debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
1944 Fields = xrealloc(Fields, newsize);
1945 debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
1946
1947
1948 for (; i < maxfields; i++) {
1949 Fields[i].type = VF_SPECIAL;
1950 Fields[i].string = NULL;
1951 }
1952 }
1953
1954 for (i = size; i < nfields; i++) {
1955 clrvar(Fields + i);
1956 }
1957 nfields = size;
1958}
1959
1960static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[])
1961{
1962 int r = regexec(preg, s, 1, pmatch, 0);
1963 if (r == 0 && pmatch[0].rm_eo == 0) {
1964
1965
1966
1967
1968
1969
1970 size_t ofs = 0;
1971 do {
1972 ofs++;
1973 if (!s[ofs])
1974 return REG_NOMATCH;
1975 regexec(preg, s + ofs, 1, pmatch, 0);
1976 } while (pmatch[0].rm_eo == 0);
1977 pmatch[0].rm_so += ofs;
1978 pmatch[0].rm_eo += ofs;
1979 }
1980 return r;
1981}
1982
1983static int awk_split(const char *s, node *spl, char **slist)
1984{
1985 int n;
1986 char c[4];
1987 char *s1;
1988
1989
1990 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1991 strcpy(s1, s);
1992
1993 c[0] = c[1] = (char)spl->info;
1994 c[2] = c[3] = '\0';
1995 if (*getvar_s(intvar[RS]) == '\0')
1996 c[2] = '\n';
1997
1998 n = 0;
1999 if (spl->info == TI_REGEXP) {
2000 if (!*s)
2001 return n;
2002 n++;
2003 do {
2004 int l;
2005 regmatch_t pmatch[1];
2006
2007 l = strcspn(s, c+2);
2008 if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
2009 && pmatch[0].rm_so <= l
2010 ) {
2011
2012 l = pmatch[0].rm_so;
2013 n++;
2014 } else {
2015 pmatch[0].rm_eo = l;
2016 if (s[l])
2017 pmatch[0].rm_eo++;
2018 }
2019 s1 = mempcpy(s1, s, l);
2020 *s1++ = '\0';
2021 s += pmatch[0].rm_eo;
2022 } while (*s);
2023
2024
2025
2026
2027 *s1 = '\0';
2028
2029 return n;
2030 }
2031 if (c[0] == '\0') {
2032 while (*s) {
2033 *s1++ = *s++;
2034 *s1++ = '\0';
2035 n++;
2036 }
2037 return n;
2038 }
2039 if (c[0] != ' ') {
2040 if (icase) {
2041 c[0] = toupper(c[0]);
2042 c[1] = tolower(c[1]);
2043 }
2044 if (*s1)
2045 n++;
2046 while ((s1 = strpbrk(s1, c)) != NULL) {
2047 *s1++ = '\0';
2048 n++;
2049 }
2050 return n;
2051 }
2052
2053 while (*s) {
2054 s = skip_whitespace(s);
2055 if (!*s)
2056 break;
2057 n++;
2058 while (*s && !isspace(*s))
2059 *s1++ = *s++;
2060 *s1++ = '\0';
2061 }
2062 return n;
2063}
2064
2065static void split_f0(void)
2066{
2067
2068#define fstrings (G.split_f0__fstrings)
2069
2070 int i, n;
2071 char *s;
2072
2073 if (is_f0_split)
2074 return;
2075
2076 is_f0_split = TRUE;
2077 free(fstrings);
2078 fsrealloc(0);
2079 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
2080 fsrealloc(n);
2081 s = fstrings;
2082 for (i = 0; i < n; i++) {
2083 Fields[i].string = nextword(&s);
2084 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
2085 }
2086
2087
2088 clrvar(intvar[NF]);
2089 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
2090 intvar[NF]->number = nfields;
2091#undef fstrings
2092}
2093
2094
2095static void handle_special(var *v)
2096{
2097 int n;
2098 char *b;
2099 const char *sep, *s;
2100 int sl, l, len, i, bsize;
2101
2102 if (!(v->type & VF_SPECIAL))
2103 return;
2104
2105 if (v == intvar[NF]) {
2106 n = (int)getvar_i(v);
2107 if (n < 0)
2108 syntax_error("NF set to negative value");
2109 fsrealloc(n);
2110
2111
2112 sep = getvar_s(intvar[OFS]);
2113 sl = strlen(sep);
2114 b = NULL;
2115 len = 0;
2116 for (i = 0; i < n; i++) {
2117 s = getvar_s(&Fields[i]);
2118 l = strlen(s);
2119 if (b) {
2120 memcpy(b+len, sep, sl);
2121 len += sl;
2122 }
2123 b = qrealloc(b, len+l+sl, &bsize);
2124 memcpy(b+len, s, l);
2125 len += l;
2126 }
2127 if (b)
2128 b[len] = '\0';
2129 setvar_p(intvar[F0], b);
2130 is_f0_split = TRUE;
2131
2132 } else if (v == intvar[F0]) {
2133 is_f0_split = FALSE;
2134
2135 } else if (v == intvar[FS]) {
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146 split_f0();
2147
2148 mk_splitter(getvar_s(v), &fsplitter);
2149 } else if (v == intvar[RS]) {
2150 mk_splitter(getvar_s(v), &rsplitter);
2151 } else if (v == intvar[IGNORECASE]) {
2152 icase = istrue(v);
2153 } else {
2154 n = getvar_i(intvar[NF]);
2155 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
2156
2157 }
2158}
2159
2160
2161static node *nextarg(node **pn)
2162{
2163 node *n;
2164
2165 n = *pn;
2166 if (n && n->info == TI_COMMA) {
2167 *pn = n->r.n;
2168 n = n->l.n;
2169 } else {
2170 *pn = NULL;
2171 }
2172 return n;
2173}
2174
2175static void hashwalk_init(var *v, xhash *array)
2176{
2177 hash_item *hi;
2178 unsigned i;
2179 walker_list *w;
2180 walker_list *prev_walker;
2181
2182 if (v->type & VF_WALK) {
2183 prev_walker = v->x.walker;
2184 } else {
2185 v->type |= VF_WALK;
2186 prev_walker = NULL;
2187 }
2188 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
2189
2190 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1);
2191 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
2192 w->cur = w->end = w->wbuf;
2193 w->prev = prev_walker;
2194 for (i = 0; i < array->csize; i++) {
2195 hi = array->items[i];
2196 while (hi) {
2197 w->end = stpcpy(w->end, hi->name) + 1;
2198 hi = hi->next;
2199 }
2200 }
2201}
2202
2203static int hashwalk_next(var *v)
2204{
2205 walker_list *w = v->x.walker;
2206
2207 if (w->cur >= w->end) {
2208 walker_list *prev_walker = w->prev;
2209
2210 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
2211 free(w);
2212 v->x.walker = prev_walker;
2213 return FALSE;
2214 }
2215
2216 setvar_s(v, nextword(&w->cur));
2217 return TRUE;
2218}
2219
2220
2221static int ptest(node *pattern)
2222{
2223
2224
2225
2226
2227 return istrue(evaluate(pattern, &G.ptest__tmpvar));
2228}
2229
2230
2231static int awk_getline(rstream *rsm, var *v)
2232{
2233 char *b;
2234 regmatch_t pmatch[1];
2235 int size, a, p, pp = 0;
2236 int fd, so, eo, r, rp;
2237 char c, *m, *s;
2238
2239 debug_printf_eval("entered %s()\n", __func__);
2240
2241
2242
2243
2244 fd = fileno(rsm->F);
2245 m = rsm->buffer;
2246 a = rsm->adv;
2247 p = rsm->pos;
2248 size = rsm->size;
2249 c = (char) rsplitter.n.info;
2250 rp = 0;
2251
2252 if (!m)
2253 m = qrealloc(m, 256, &size);
2254
2255 do {
2256 b = m + a;
2257 so = eo = p;
2258 r = 1;
2259 if (p > 0) {
2260 if (rsplitter.n.info == TI_REGEXP) {
2261 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2262 b, 1, pmatch, 0) == 0) {
2263 so = pmatch[0].rm_so;
2264 eo = pmatch[0].rm_eo;
2265 if (b[eo] != '\0')
2266 break;
2267 }
2268 } else if (c != '\0') {
2269 s = strchr(b+pp, c);
2270 if (!s)
2271 s = memchr(b+pp, '\0', p - pp);
2272 if (s) {
2273 so = eo = s-b;
2274 eo++;
2275 break;
2276 }
2277 } else {
2278 while (b[rp] == '\n')
2279 rp++;
2280 s = strstr(b+rp, "\n\n");
2281 if (s) {
2282 so = eo = s-b;
2283 while (b[eo] == '\n')
2284 eo++;
2285 if (b[eo] != '\0')
2286 break;
2287 }
2288 }
2289 }
2290
2291 if (a > 0) {
2292 memmove(m, m+a, p+1);
2293 b = m;
2294 a = 0;
2295 }
2296
2297 m = qrealloc(m, a+p+128, &size);
2298 b = m + a;
2299 pp = p;
2300 p += safe_read(fd, b+p, size-p-1);
2301 if (p < pp) {
2302 p = 0;
2303 r = 0;
2304 setvar_i(intvar[ERRNO], errno);
2305 }
2306 b[p] = '\0';
2307
2308 } while (p > pp);
2309
2310 if (p == 0) {
2311 r--;
2312 } else {
2313 c = b[so]; b[so] = '\0';
2314 setvar_s(v, b+rp);
2315 v->type |= VF_USER;
2316 b[so] = c;
2317 c = b[eo]; b[eo] = '\0';
2318 setvar_s(intvar[RT], b+so);
2319 b[eo] = c;
2320 }
2321
2322 rsm->buffer = m;
2323 rsm->adv = a + eo;
2324 rsm->pos = p - eo;
2325 rsm->size = size;
2326
2327 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2328
2329 return r;
2330}
2331
2332
2333#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2334# define awk_printf(a, b) awk_printf(a)
2335#endif
2336static char *awk_printf(node *n, size_t *len)
2337{
2338 char *b;
2339 char *fmt, *f;
2340 size_t i;
2341
2342
2343#define TMPVAR (&G.awk_printf__tmpvar)
2344
2345
2346
2347
2348 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
2349
2350
2351
2352
2353 b = NULL;
2354 i = 0;
2355 while (1) {
2356 char *s;
2357 char c;
2358 char sv;
2359 var *arg;
2360 size_t slen;
2361
2362
2363 s = f;
2364 while (1) {
2365 c = *f;
2366 if (!c)
2367 goto nul;
2368 f++;
2369 if (c == '%')
2370 break;
2371 }
2372
2373 c = *f;
2374 if (!c)
2375 goto nul;
2376 if (c == '%') {
2377 slen = f - s;
2378 s = xstrndup(s, slen);
2379 f++;
2380 goto append;
2381 }
2382 while (1) {
2383 if (isalpha(c))
2384 break;
2385 if (c == '*')
2386 syntax_error("%*x formats are not supported");
2387 c = *++f;
2388 if (!c) {
2389
2390 nul:
2391 slen = f - s;
2392 goto tail;
2393 }
2394 }
2395
2396
2397 arg = evaluate(nextarg(&n), TMPVAR);
2398
2399
2400
2401
2402 sv = *++f;
2403 *f = '\0';
2404 if (c == 'c') {
2405 char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
2406 char *r = xasprintf(s, cc ? cc : '^' );
2407 slen = strlen(r);
2408 if (cc == '\0')
2409 sprintf(r, s, cc);
2410 s = r;
2411 } else {
2412 if (c == 's') {
2413 s = xasprintf(s, getvar_s(arg));
2414 } else {
2415 double d = getvar_i(arg);
2416 if (strchr("diouxX", c)) {
2417
2418 s = xasprintf(s, (int)d);
2419 } else if (strchr("eEfFgGaA", c)) {
2420 s = xasprintf(s, d);
2421 } else {
2422
2423 syntax_error(EMSG_INV_FMT);
2424 }
2425 }
2426 slen = strlen(s);
2427 }
2428 *f = sv;
2429 append:
2430 if (i == 0) {
2431 b = s;
2432 i = slen;
2433 continue;
2434 }
2435 tail:
2436 b = xrealloc(b, i + slen + 1);
2437 strcpy(b + i, s);
2438 i += slen;
2439 if (!c)
2440 break;
2441 free(s);
2442 }
2443
2444 free(fmt);
2445
2446#undef TMPVAR
2447
2448#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2449 if (len)
2450 *len = i;
2451#endif
2452 return b;
2453}
2454
2455
2456
2457
2458
2459
2460
2461
2462static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2463{
2464 char *resbuf;
2465 const char *sp;
2466 int match_no, residx, replen, resbufsize;
2467 int regexec_flags;
2468 regmatch_t pmatch[10];
2469 regex_t sreg, *regex;
2470
2471 resbuf = NULL;
2472 residx = 0;
2473 match_no = 0;
2474 regexec_flags = 0;
2475 regex = as_regex(rn, &sreg);
2476 sp = getvar_s(src ? src : intvar[F0]);
2477 replen = strlen(repl);
2478 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2479 int so = pmatch[0].rm_so;
2480 int eo = pmatch[0].rm_eo;
2481
2482
2483 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2484 memcpy(resbuf + residx, sp, eo);
2485 residx += eo;
2486 if (++match_no >= nm) {
2487 const char *s;
2488 int nbs;
2489
2490
2491 residx -= (eo - so);
2492 nbs = 0;
2493 for (s = repl; *s; s++) {
2494 char c = resbuf[residx++] = *s;
2495 if (c == '\\') {
2496 nbs++;
2497 continue;
2498 }
2499 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2500 int j;
2501 residx -= ((nbs + 3) >> 1);
2502 j = 0;
2503 if (c != '&') {
2504 j = c - '0';
2505 nbs++;
2506 }
2507 if (nbs % 2) {
2508 resbuf[residx++] = c;
2509 } else {
2510 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2511 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2512 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2513 residx += n;
2514 }
2515 }
2516 nbs = 0;
2517 }
2518 }
2519
2520 regexec_flags = REG_NOTBOL;
2521 sp += eo;
2522 if (match_no == nm)
2523 break;
2524 if (eo == so) {
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535 resbuf[residx] = *sp;
2536 if (*sp == '\0')
2537 goto ret;
2538 sp++;
2539 residx++;
2540 }
2541 }
2542
2543 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2544 strcpy(resbuf + residx, sp);
2545 ret:
2546
2547 setvar_p(dest ? dest : intvar[F0], resbuf);
2548 if (regex == &sreg)
2549 regfree(regex);
2550 return match_no;
2551}
2552
2553static NOINLINE int do_mktime(const char *ds)
2554{
2555 struct tm then;
2556 int count;
2557
2558
2559 then.tm_isdst = -1;
2560
2561
2562
2563 count = sscanf(ds, "%u %u %u %u %u %u %d",
2564 &then.tm_year, &then.tm_mon, &then.tm_mday,
2565 &then.tm_hour, &then.tm_min, &then.tm_sec,
2566 &then.tm_isdst);
2567
2568 if (count < 6
2569 || (unsigned)then.tm_mon < 1
2570 || (unsigned)then.tm_year < 1900
2571 ) {
2572 return -1;
2573 }
2574
2575 then.tm_mon -= 1;
2576 then.tm_year -= 1900;
2577
2578 return mktime(&then);
2579}
2580
2581
2582static NOINLINE var *do_match(node *an1, const char *as0)
2583{
2584 regmatch_t pmatch[1];
2585 regex_t sreg, *re;
2586 int n, start, len;
2587
2588 re = as_regex(an1, &sreg);
2589 n = regexec(re, as0, 1, pmatch, 0);
2590 if (re == &sreg)
2591 regfree(re);
2592 start = 0;
2593 len = -1;
2594 if (n == 0) {
2595 start = pmatch[0].rm_so + 1;
2596 len = pmatch[0].rm_eo - pmatch[0].rm_so;
2597 }
2598 setvar_i(newvar("RLENGTH"), len);
2599 return setvar_i(newvar("RSTART"), start);
2600}
2601
2602
2603static NOINLINE var *exec_builtin(node *op, var *res)
2604{
2605#define tspl (G.exec_builtin__tspl)
2606
2607 var *tmpvars;
2608 node *an[4];
2609 var *av[4];
2610 const char *as[4];
2611 node *spl;
2612 uint32_t isr, info;
2613 int nargs;
2614 time_t tt;
2615 int i, l, ll, n;
2616
2617 tmpvars = nvalloc(4);
2618#define TMPVAR0 (tmpvars)
2619#define TMPVAR1 (tmpvars + 1)
2620#define TMPVAR2 (tmpvars + 2)
2621#define TMPVAR3 (tmpvars + 3)
2622#define TMPVAR(i) (tmpvars + (i))
2623 isr = info = op->info;
2624 op = op->l.n;
2625
2626 av[2] = av[3] = NULL;
2627 for (i = 0; i < 4 && op; i++) {
2628 an[i] = nextarg(&op);
2629 if (isr & 0x09000000) {
2630 av[i] = evaluate(an[i], TMPVAR(i));
2631 if (isr & 0x08000000)
2632 as[i] = getvar_s(av[i]);
2633 }
2634 isr >>= 1;
2635 }
2636
2637 nargs = i;
2638 if ((uint32_t)nargs < (info >> 30))
2639 syntax_error(EMSG_TOO_FEW_ARGS);
2640
2641 info &= OPNMASK;
2642 switch (info) {
2643
2644 case B_a2:
2645 if (ENABLE_FEATURE_AWK_LIBM)
2646 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2647 else
2648 syntax_error(EMSG_NO_MATH);
2649 break;
2650
2651 case B_sp: {
2652 char *s, *s1;
2653
2654 if (nargs > 2) {
2655 spl = (an[2]->info == TI_REGEXP) ? an[2]
2656 : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
2657 } else {
2658 spl = &fsplitter.n;
2659 }
2660
2661 n = awk_split(as[0], spl, &s);
2662 s1 = s;
2663 clear_array(iamarray(av[1]));
2664 for (i = 1; i <= n; i++)
2665 setari_u(av[1], i, nextword(&s));
2666 free(s1);
2667 setvar_i(res, n);
2668 break;
2669 }
2670
2671 case B_ss: {
2672 char *s;
2673
2674 l = strlen(as[0]);
2675 i = getvar_i(av[1]) - 1;
2676 if (i > l)
2677 i = l;
2678 if (i < 0)
2679 i = 0;
2680 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2681 if (n < 0)
2682 n = 0;
2683 s = xstrndup(as[0]+i, n);
2684 setvar_p(res, s);
2685 break;
2686 }
2687
2688
2689
2690 case B_an:
2691 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2692 break;
2693
2694 case B_co:
2695 setvar_i(res, ~getvar_i_int(av[0]));
2696 break;
2697
2698 case B_ls:
2699 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2700 break;
2701
2702 case B_or:
2703 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2704 break;
2705
2706 case B_rs:
2707 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2708 break;
2709
2710 case B_xo:
2711 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2712 break;
2713
2714 case B_lo:
2715 case B_up: {
2716 char *s, *s1;
2717 s1 = s = xstrdup(as[0]);
2718 while (*s1) {
2719
2720 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2721 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2722 s1++;
2723 }
2724 setvar_p(res, s);
2725 break;
2726 }
2727
2728 case B_ix:
2729 n = 0;
2730 ll = strlen(as[1]);
2731 l = strlen(as[0]) - ll;
2732 if (ll > 0 && l >= 0) {
2733 if (!icase) {
2734 char *s = strstr(as[0], as[1]);
2735 if (s)
2736 n = (s - as[0]) + 1;
2737 } else {
2738
2739
2740
2741 for (i = 0; i <= l; i++) {
2742 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2743 n = i+1;
2744 break;
2745 }
2746 }
2747 }
2748 }
2749 setvar_i(res, n);
2750 break;
2751
2752 case B_ti:
2753 if (nargs > 1)
2754 tt = getvar_i(av[1]);
2755 else
2756 time(&tt);
2757
2758 i = strftime(g_buf, MAXVARFMT,
2759 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2760 localtime(&tt));
2761 g_buf[i] = '\0';
2762 setvar_s(res, g_buf);
2763 break;
2764
2765 case B_mt:
2766 setvar_i(res, do_mktime(as[0]));
2767 break;
2768
2769 case B_ma:
2770 res = do_match(an[1], as[0]);
2771 break;
2772
2773 case B_ge:
2774 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2775 break;
2776
2777 case B_gs:
2778 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2779 break;
2780
2781 case B_su:
2782 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2783 break;
2784 }
2785
2786 nvfree(tmpvars, 4);
2787#undef TMPVAR0
2788#undef TMPVAR1
2789#undef TMPVAR2
2790#undef TMPVAR3
2791#undef TMPVAR
2792
2793 return res;
2794#undef tspl
2795}
2796
2797
2798
2799static int is_assignment(const char *expr)
2800{
2801 char *exprc, *val;
2802
2803 val = (char*)endofname(expr);
2804 if (val == (char*)expr || *val != '=') {
2805 return FALSE;
2806 }
2807
2808 exprc = xstrdup(expr);
2809 val = exprc + (val - expr);
2810 *val++ = '\0';
2811
2812 unescape_string_in_place(val);
2813 setvar_u(newvar(exprc), val);
2814 free(exprc);
2815 return TRUE;
2816}
2817
2818
2819static rstream *next_input_file(void)
2820{
2821#define rsm (G.next_input_file__rsm)
2822#define files_happen (G.next_input_file__files_happen)
2823
2824 const char *fname, *ind;
2825
2826 if (rsm.F)
2827 fclose(rsm.F);
2828 rsm.F = NULL;
2829 rsm.pos = rsm.adv = 0;
2830
2831 for (;;) {
2832 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2833 if (files_happen)
2834 return NULL;
2835 fname = "-";
2836 rsm.F = stdin;
2837 break;
2838 }
2839 ind = getvar_s(incvar(intvar[ARGIND]));
2840 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2841 if (fname && *fname && !is_assignment(fname)) {
2842 rsm.F = xfopen_stdin(fname);
2843 break;
2844 }
2845 }
2846
2847 files_happen = TRUE;
2848 setvar_s(intvar[FILENAME], fname);
2849 return &rsm;
2850#undef rsm
2851#undef files_happen
2852}
2853
2854
2855
2856
2857
2858
2859
2860#define XC(n) ((n) >> 8)
2861
2862static var *evaluate(node *op, var *res)
2863{
2864
2865#define fnargs (G.evaluate__fnargs)
2866
2867#define seed (G.evaluate__seed)
2868#define sreg (G.evaluate__sreg)
2869
2870 var *tmpvars;
2871
2872 if (!op)
2873 return setvar_s(res, NULL);
2874
2875 debug_printf_eval("entered %s()\n", __func__);
2876
2877 tmpvars = nvalloc(2);
2878#define TMPVAR0 (tmpvars)
2879#define TMPVAR1 (tmpvars + 1)
2880
2881 while (op) {
2882 struct {
2883 var *v;
2884 const char *s;
2885 } L = L;
2886 struct {
2887 var *v;
2888 const char *s;
2889 } R = R;
2890 double L_d = L_d;
2891 uint32_t opinfo;
2892 int opn;
2893 node *op1;
2894
2895 opinfo = op->info;
2896 opn = (opinfo & OPNMASK);
2897 g_lineno = op->lineno;
2898 op1 = op->l.n;
2899 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2900
2901
2902 if (opinfo & OF_RES1) {
2903 if ((opinfo & OF_REQUIRED) && !op1)
2904 syntax_error(EMSG_TOO_FEW_ARGS);
2905 L.v = evaluate(op1, TMPVAR0);
2906 if (opinfo & OF_STR1) {
2907 L.s = getvar_s(L.v);
2908 debug_printf_eval("L.s:'%s'\n", L.s);
2909 }
2910 if (opinfo & OF_NUM1) {
2911 L_d = getvar_i(L.v);
2912 debug_printf_eval("L_d:%f\n", L_d);
2913 }
2914 }
2915
2916
2917
2918
2919
2920
2921
2922 if (opinfo & OF_RES2) {
2923 R.v = evaluate(op->r.n, TMPVAR1);
2924
2925
2926 if (opinfo & OF_STR2) {
2927 R.s = getvar_s(R.v);
2928 debug_printf_eval("R.s:'%s'\n", R.s);
2929 }
2930 }
2931
2932 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2933 switch (XC(opinfo & OPCLSMASK)) {
2934
2935
2936
2937
2938 case XC( OC_TEST ):
2939 debug_printf_eval("TEST\n");
2940 if (op1->info == TI_COMMA) {
2941
2942 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2943 op->info |= OF_CHECKED;
2944 if (ptest(op1->r.n))
2945 op->info &= ~OF_CHECKED;
2946 op = op->a.n;
2947 } else {
2948 op = op->r.n;
2949 }
2950 } else {
2951 op = ptest(op1) ? op->a.n : op->r.n;
2952 }
2953 break;
2954
2955
2956 case XC( OC_EXEC ):
2957 debug_printf_eval("EXEC\n");
2958 break;
2959
2960
2961 case XC( OC_BR ):
2962 debug_printf_eval("BR\n");
2963 op = istrue(L.v) ? op->a.n : op->r.n;
2964 break;
2965
2966
2967 case XC( OC_WALKINIT ):
2968 debug_printf_eval("WALKINIT\n");
2969 hashwalk_init(L.v, iamarray(R.v));
2970 break;
2971
2972
2973 case XC( OC_WALKNEXT ):
2974 debug_printf_eval("WALKNEXT\n");
2975 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2976 break;
2977
2978 case XC( OC_PRINT ):
2979 debug_printf_eval("PRINT /\n");
2980 case XC( OC_PRINTF ):
2981 debug_printf_eval("PRINTF\n");
2982 {
2983 FILE *F = stdout;
2984
2985 if (op->r.n) {
2986 rstream *rsm = newfile(R.s);
2987 if (!rsm->F) {
2988 if (opn == '|') {
2989 rsm->F = popen(R.s, "w");
2990 if (rsm->F == NULL)
2991 bb_simple_perror_msg_and_die("popen");
2992 rsm->is_pipe = 1;
2993 } else {
2994 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2995 }
2996 }
2997 F = rsm->F;
2998 }
2999
3000
3001
3002
3003 if ((opinfo & OPCLSMASK) == OC_PRINT) {
3004 if (!op1) {
3005 fputs(getvar_s(intvar[F0]), F);
3006 } else {
3007 for (;;) {
3008 var *v = evaluate(nextarg(&op1), TMPVAR0);
3009 if (v->type & VF_NUMBER) {
3010 fmt_num(getvar_s(intvar[OFMT]),
3011 getvar_i(v));
3012 fputs(g_buf, F);
3013 } else {
3014 fputs(getvar_s(v), F);
3015 }
3016 if (!op1)
3017 break;
3018 fputs(getvar_s(intvar[OFS]), F);
3019 }
3020 }
3021 fputs(getvar_s(intvar[ORS]), F);
3022 } else {
3023 IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
3024 char *s = awk_printf(op1, &len);
3025#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3026 fwrite(s, len, 1, F);
3027#else
3028 fputs(s, F);
3029#endif
3030 free(s);
3031 }
3032 fflush(F);
3033 break;
3034 }
3035
3036 case XC( OC_DELETE ):
3037 debug_printf_eval("DELETE\n");
3038 {
3039
3040
3041
3042 uint32_t info = op1->info & OPCLSMASK;
3043 var *v;
3044
3045 if (info == OC_VAR) {
3046 v = op1->l.v;
3047 } else if (info == OC_FNARG) {
3048 v = &fnargs[op1->l.aidx];
3049 } else {
3050 syntax_error(EMSG_NOT_ARRAY);
3051 }
3052 if (op1->r.n) {
3053 const char *s;
3054 s = getvar_s(evaluate(op1->r.n, TMPVAR0));
3055 hash_remove(iamarray(v), s);
3056 } else {
3057 clear_array(iamarray(v));
3058 }
3059 break;
3060 }
3061
3062 case XC( OC_NEWSOURCE ):
3063 debug_printf_eval("NEWSOURCE\n");
3064 g_progname = op->l.new_progname;
3065 break;
3066
3067 case XC( OC_RETURN ):
3068 debug_printf_eval("RETURN\n");
3069 copyvar(res, L.v);
3070 break;
3071
3072 case XC( OC_NEXTFILE ):
3073 debug_printf_eval("NEXTFILE\n");
3074 nextfile = TRUE;
3075 case XC( OC_NEXT ):
3076 debug_printf_eval("NEXT\n");
3077 nextrec = TRUE;
3078 case XC( OC_DONE ):
3079 debug_printf_eval("DONE\n");
3080 clrvar(res);
3081 break;
3082
3083 case XC( OC_EXIT ):
3084 debug_printf_eval("EXIT\n");
3085 if (op1)
3086 G.exitcode = (int)L_d;
3087 awk_exit();
3088
3089
3090
3091 case XC( OC_VAR ):
3092 debug_printf_eval("VAR\n");
3093 L.v = op->l.v;
3094 if (L.v == intvar[NF])
3095 split_f0();
3096 goto v_cont;
3097
3098 case XC( OC_FNARG ):
3099 debug_printf_eval("FNARG[%d]\n", op->l.aidx);
3100 L.v = &fnargs[op->l.aidx];
3101 v_cont:
3102 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
3103 break;
3104
3105 case XC( OC_IN ):
3106 debug_printf_eval("IN\n");
3107 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
3108 break;
3109
3110 case XC( OC_REGEXP ):
3111 debug_printf_eval("REGEXP\n");
3112 op1 = op;
3113 L.s = getvar_s(intvar[F0]);
3114 goto re_cont;
3115
3116 case XC( OC_MATCH ):
3117 debug_printf_eval("MATCH\n");
3118 op1 = op->r.n;
3119 re_cont:
3120 {
3121 regex_t *re = as_regex(op1, &sreg);
3122 int i = regexec(re, L.s, 0, NULL, 0);
3123 if (re == &sreg)
3124 regfree(re);
3125 setvar_i(res, (i == 0) ^ (opn == '!'));
3126 }
3127 break;
3128
3129 case XC( OC_MOVE ):
3130 debug_printf_eval("MOVE\n");
3131
3132 if (L.v == TMPVAR0)
3133 L.v = res;
3134
3135 if (R.v == TMPVAR1
3136 && !(R.v->type & VF_NUMBER)
3137
3138
3139
3140 ) {
3141 res = setvar_p(L.v, R.v->string);
3142 R.v->string = NULL;
3143 } else {
3144 res = copyvar(L.v, R.v);
3145 }
3146 break;
3147
3148 case XC( OC_TERNARY ):
3149 debug_printf_eval("TERNARY\n");
3150 if (op->r.n->info != TI_COLON)
3151 syntax_error(EMSG_POSSIBLE_ERROR);
3152 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
3153 break;
3154
3155 case XC( OC_FUNC ): {
3156 var *argvars, *sv_fnargs;
3157 const char *sv_progname;
3158 int nargs, i;
3159
3160 debug_printf_eval("FUNC\n");
3161
3162 if (!op->r.f->defined)
3163 syntax_error(EMSG_UNDEF_FUNC);
3164
3165
3166 nargs = op->r.f->nargs;
3167 argvars = nvalloc(nargs);
3168 i = 0;
3169 while (op1) {
3170 var *arg = evaluate(nextarg(&op1), TMPVAR0);
3171 if (i == nargs) {
3172
3173
3174
3175 clrvar(arg);
3176 continue;
3177 }
3178 copyvar(&argvars[i], arg);
3179 argvars[i].type |= VF_CHILD;
3180 argvars[i].x.parent = arg;
3181 i++;
3182 }
3183
3184 sv_fnargs = fnargs;
3185 sv_progname = g_progname;
3186
3187 fnargs = argvars;
3188 res = evaluate(op->r.f->body.first, res);
3189 nvfree(argvars, nargs);
3190
3191 g_progname = sv_progname;
3192 fnargs = sv_fnargs;
3193
3194 break;
3195 }
3196
3197 case XC( OC_GETLINE ):
3198 debug_printf_eval("GETLINE /\n");
3199 case XC( OC_PGETLINE ):
3200 debug_printf_eval("PGETLINE\n");
3201 {
3202 rstream *rsm;
3203 int i;
3204
3205 if (op1) {
3206 rsm = newfile(L.s);
3207 if (!rsm->F) {
3208
3209 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
3210 rsm->F = popen(L.s, "r");
3211 rsm->is_pipe = TRUE;
3212 } else {
3213 rsm->F = fopen_for_read(L.s);
3214 }
3215 }
3216 } else {
3217 if (!iF)
3218 iF = next_input_file();
3219 rsm = iF;
3220 }
3221
3222 if (!rsm || !rsm->F) {
3223 setvar_i(intvar[ERRNO], errno);
3224 setvar_i(res, -1);
3225 break;
3226 }
3227
3228 if (!op->r.n)
3229 R.v = intvar[F0];
3230
3231 i = awk_getline(rsm, R.v);
3232 if (i > 0 && !op1) {
3233 incvar(intvar[FNR]);
3234 incvar(intvar[NR]);
3235 }
3236 setvar_i(res, i);
3237 break;
3238 }
3239
3240
3241 case XC( OC_FBLTIN ): {
3242 double R_d = R_d;
3243 debug_printf_eval("FBLTIN\n");
3244
3245 if (op1 && op1->info == TI_COMMA)
3246
3247 syntax_error("Too many arguments");
3248
3249 switch (opn) {
3250 case F_in:
3251 R_d = (long long)L_d;
3252 break;
3253
3254 case F_rn:
3255 if (op1)
3256 syntax_error("Too many arguments");
3257 {
3258#if RAND_MAX >= 0x7fffffff
3259 uint32_t u = ((uint32_t)rand() << 16) ^ rand();
3260 uint64_t v = ((uint64_t)rand() << 32) | u;
3261
3262# if RAND_MAX > 0x7fffffff
3263 v &= 0x7fffffffffffffffULL;
3264# endif
3265 R_d = (double)v / 0x8000000000000000ULL;
3266#else
3267# error Not implemented for this value of RAND_MAX
3268#endif
3269 break;
3270 }
3271 case F_co:
3272 if (ENABLE_FEATURE_AWK_LIBM) {
3273 R_d = cos(L_d);
3274 break;
3275 }
3276
3277 case F_ex:
3278 if (ENABLE_FEATURE_AWK_LIBM) {
3279 R_d = exp(L_d);
3280 break;
3281 }
3282
3283 case F_lg:
3284 if (ENABLE_FEATURE_AWK_LIBM) {
3285 R_d = log(L_d);
3286 break;
3287 }
3288
3289 case F_si:
3290 if (ENABLE_FEATURE_AWK_LIBM) {
3291 R_d = sin(L_d);
3292 break;
3293 }
3294
3295 case F_sq:
3296 if (ENABLE_FEATURE_AWK_LIBM) {
3297 R_d = sqrt(L_d);
3298 break;
3299 }
3300
3301 syntax_error(EMSG_NO_MATH);
3302 break;
3303
3304 case F_sr:
3305 R_d = (double)seed;
3306 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
3307 srand(seed);
3308 break;
3309
3310 case F_ti:
3311 if (op1)
3312 syntax_error("Too many arguments");
3313 R_d = time(NULL);
3314 break;
3315
3316 case F_le:
3317 debug_printf_eval("length: L.s:'%s'\n", L.s);
3318 if (!op1) {
3319 L.s = getvar_s(intvar[F0]);
3320 debug_printf_eval("length: L.s='%s'\n", L.s);
3321 }
3322 else if (L.v->type & VF_ARRAY) {
3323 R_d = L.v->x.array->nel;
3324 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
3325 break;
3326 }
3327 R_d = strlen(L.s);
3328 break;
3329
3330 case F_sy:
3331 fflush_all();
3332 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
3333 ? (system(L.s) >> 8) : 0;
3334 break;
3335
3336 case F_ff:
3337 if (!op1) {
3338 fflush(stdout);
3339 } else if (L.s && *L.s) {
3340 rstream *rsm = newfile(L.s);
3341 fflush(rsm->F);
3342 } else {
3343 fflush_all();
3344 }
3345 break;
3346
3347 case F_cl: {
3348 rstream *rsm;
3349 int err = 0;
3350 rsm = (rstream *)hash_search(fdhash, L.s);
3351 debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
3352 if (rsm) {
3353 debug_printf_eval("OC_FBLTIN F_cl "
3354 "rsm->is_pipe:%d, ->F:%p\n",
3355 rsm->is_pipe, rsm->F);
3356
3357
3358
3359
3360 if (rsm->F)
3361 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
3362
3363
3364
3365
3366
3367 free(rsm->buffer);
3368 hash_remove(fdhash, L.s);
3369 }
3370 if (err)
3371 setvar_i(intvar[ERRNO], errno);
3372 R_d = (double)err;
3373 break;
3374 }
3375 }
3376 setvar_i(res, R_d);
3377 break;
3378 }
3379
3380 case XC( OC_BUILTIN ):
3381 debug_printf_eval("BUILTIN\n");
3382 res = exec_builtin(op, res);
3383 break;
3384
3385 case XC( OC_SPRINTF ):
3386 debug_printf_eval("SPRINTF\n");
3387 setvar_p(res, awk_printf(op1, NULL));
3388 break;
3389
3390 case XC( OC_UNARY ):
3391 debug_printf_eval("UNARY\n");
3392 {
3393 double Ld, R_d;
3394
3395 Ld = R_d = getvar_i(R.v);
3396 switch (opn) {
3397 case 'P':
3398 Ld = ++R_d;
3399 goto r_op_change;
3400 case 'p':
3401 R_d++;
3402 goto r_op_change;
3403 case 'M':
3404 Ld = --R_d;
3405 goto r_op_change;
3406 case 'm':
3407 R_d--;
3408 r_op_change:
3409 setvar_i(R.v, R_d);
3410 break;
3411 case '!':
3412 Ld = !istrue(R.v);
3413 break;
3414 case '-':
3415 Ld = -R_d;
3416 break;
3417 }
3418 setvar_i(res, Ld);
3419 break;
3420 }
3421
3422 case XC( OC_FIELD ):
3423 debug_printf_eval("FIELD\n");
3424 {
3425 int i = (int)getvar_i(R.v);
3426 if (i < 0)
3427 syntax_error(EMSG_NEGATIVE_FIELD);
3428 if (i == 0) {
3429 res = intvar[F0];
3430 } else {
3431 split_f0();
3432 if (i > nfields)
3433 fsrealloc(i);
3434 res = &Fields[i - 1];
3435 }
3436 break;
3437 }
3438
3439
3440 case XC( OC_CONCAT ):
3441 debug_printf_eval("CONCAT /\n");
3442 case XC( OC_COMMA ): {
3443 const char *sep = "";
3444 debug_printf_eval("COMMA\n");
3445 if (opinfo == TI_COMMA)
3446 sep = getvar_s(intvar[SUBSEP]);
3447 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
3448 break;
3449 }
3450
3451 case XC( OC_LAND ):
3452 debug_printf_eval("LAND\n");
3453 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
3454 break;
3455
3456 case XC( OC_LOR ):
3457 debug_printf_eval("LOR\n");
3458 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
3459 break;
3460
3461 case XC( OC_BINARY ):
3462 debug_printf_eval("BINARY /\n");
3463 case XC( OC_REPLACE ):
3464 debug_printf_eval("REPLACE\n");
3465 {
3466 double R_d = getvar_i(R.v);
3467 debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
3468 switch (opn) {
3469 case '+':
3470 L_d += R_d;
3471 break;
3472 case '-':
3473 L_d -= R_d;
3474 break;
3475 case '*':
3476 L_d *= R_d;
3477 break;
3478 case '/':
3479 if (R_d == 0)
3480 syntax_error(EMSG_DIV_BY_ZERO);
3481 L_d /= R_d;
3482 break;
3483 case '&':
3484 if (ENABLE_FEATURE_AWK_LIBM)
3485 L_d = pow(L_d, R_d);
3486 else
3487 syntax_error(EMSG_NO_MATH);
3488 break;
3489 case '%':
3490 if (R_d == 0)
3491 syntax_error(EMSG_DIV_BY_ZERO);
3492 L_d -= (long long)(L_d / R_d) * R_d;
3493 break;
3494 }
3495 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3496 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3497 break;
3498 }
3499
3500 case XC( OC_COMPARE ): {
3501 int i = i;
3502 double Ld;
3503 debug_printf_eval("COMPARE\n");
3504
3505 if (is_numeric(L.v) && is_numeric(R.v)) {
3506 Ld = getvar_i(L.v) - getvar_i(R.v);
3507 } else {
3508 const char *l = getvar_s(L.v);
3509 const char *r = getvar_s(R.v);
3510 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3511 }
3512 switch (opn & 0xfe) {
3513 case 0:
3514 i = (Ld > 0);
3515 break;
3516 case 2:
3517 i = (Ld >= 0);
3518 break;
3519 case 4:
3520 i = (Ld == 0);
3521 break;
3522 }
3523 debug_printf_eval("COMPARE result: %d\n", (i == 0) ^ (opn & 1));
3524 setvar_i(res, (i == 0) ^ (opn & 1));
3525 break;
3526 }
3527
3528 default:
3529 syntax_error(EMSG_POSSIBLE_ERROR);
3530 }
3531
3532 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3533 op = op->a.n;
3534 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3535 break;
3536 if (nextrec)
3537 break;
3538 }
3539
3540 nvfree(tmpvars, 2);
3541#undef TMPVAR0
3542#undef TMPVAR1
3543
3544 debug_printf_eval("returning from %s(): %p\n", __func__, res);
3545 return res;
3546#undef fnargs
3547#undef seed
3548#undef sreg
3549}
3550
3551
3552
3553static int awk_exit(void)
3554{
3555 unsigned i;
3556
3557 if (!exiting) {
3558 exiting = TRUE;
3559 nextrec = FALSE;
3560 evaluate(endseq.first, &G.exit__tmpvar);
3561 }
3562
3563
3564 for (i = 0; i < fdhash->csize; i++) {
3565 hash_item *hi;
3566 hi = fdhash->items[i];
3567 while (hi) {
3568 if (hi->data.rs.F && hi->data.rs.is_pipe)
3569 pclose(hi->data.rs.F);
3570 hi = hi->next;
3571 }
3572 }
3573
3574 exit(G.exitcode);
3575}
3576
3577int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3578int awk_main(int argc UNUSED_PARAM, char **argv)
3579{
3580 unsigned opt;
3581 char *opt_F;
3582 llist_t *list_v = NULL;
3583 llist_t *list_f = NULL;
3584#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3585 llist_t *list_e = NULL;
3586#endif
3587 int i;
3588
3589 INIT_G();
3590
3591
3592
3593 if (ENABLE_LOCALE_SUPPORT)
3594 setlocale(LC_NUMERIC, "C");
3595
3596
3597 vhash = hash_init();
3598 {
3599 char *vnames = (char *)vNames;
3600 char *vvalues = (char *)vValues;
3601 for (i = 0; *vnames; i++) {
3602 var *v;
3603 intvar[i] = v = newvar(nextword(&vnames));
3604 if (*vvalues != '\377')
3605 setvar_s(v, nextword(&vvalues));
3606 else
3607 setvar_i(v, 0);
3608
3609 if (*vnames == '*') {
3610 v->type |= VF_SPECIAL;
3611 vnames++;
3612 }
3613 }
3614 }
3615
3616 handle_special(intvar[FS]);
3617 handle_special(intvar[RS]);
3618
3619
3620 if (environ) {
3621 char **envp;
3622 for (envp = environ; *envp; envp++) {
3623
3624 char *s = *envp;
3625 char *s1 = strchr(s, '=');
3626 if (s1) {
3627 *s1 = '\0';
3628
3629
3630 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3631 *s1 = '=';
3632 }
3633 }
3634 }
3635 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3636 argv += optind;
3637
3638 if (opt & OPT_W)
3639 bb_simple_error_msg("warning: option -W is ignored");
3640 if (opt & OPT_F) {
3641 unescape_string_in_place(opt_F);
3642 setvar_s(intvar[FS], opt_F);
3643 }
3644 while (list_v) {
3645 if (!is_assignment(llist_pop(&list_v)))
3646 bb_show_usage();
3647 }
3648
3649
3650 fnhash = hash_init();
3651 ahash = hash_init();
3652 while (list_f) {
3653 int fd;
3654 char *s;
3655
3656 g_progname = llist_pop(&list_f);
3657 fd = xopen_stdin(g_progname);
3658 s = xmalloc_read(fd, NULL);
3659 close(fd);
3660 parse_program(s);
3661 free(s);
3662 }
3663 g_progname = "cmd. line";
3664#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3665 while (list_e) {
3666 parse_program(llist_pop(&list_e));
3667 }
3668#endif
3669
3670
3671 if (!(opt & (OPT_f | OPT_e))) {
3672 if (!*argv)
3673 bb_show_usage();
3674 parse_program(*argv++);
3675 }
3676
3677
3678
3679
3680 free(fnhash->items);
3681 free(fnhash);
3682 fnhash = NULL;
3683
3684
3685
3686
3687
3688 setari_u(intvar[ARGV], 0, "awk");
3689 i = 0;
3690 while (*argv)
3691 setari_u(intvar[ARGV], ++i, *argv++);
3692 setvar_i(intvar[ARGC], i + 1);
3693
3694
3695 newfile("/dev/stdin")->F = stdin;
3696 newfile("/dev/stdout")->F = stdout;
3697 newfile("/dev/stderr")->F = stderr;
3698
3699 evaluate(beginseq.first, &G.main__tmpvar);
3700 if (!mainseq.first && !endseq.first)
3701 awk_exit();
3702
3703
3704 if (!iF)
3705 iF = next_input_file();
3706
3707
3708 while (iF) {
3709 nextfile = FALSE;
3710 setvar_i(intvar[FNR], 0);
3711
3712 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3713 nextrec = FALSE;
3714 incvar(intvar[NR]);
3715 incvar(intvar[FNR]);
3716 evaluate(mainseq.first, &G.main__tmpvar);
3717
3718 if (nextfile)
3719 break;
3720 }
3721
3722 if (i < 0)
3723 syntax_error(strerror(errno));
3724
3725 iF = next_input_file();
3726 }
3727
3728 awk_exit();
3729
3730}
3731