1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include "libbb.h"
18#include "xregex.h"
19#include <math.h>
20
21
22
23
24
25
26#define debug_printf_walker(...) do {} while (0)
27#define debug_printf_eval(...) do {} while (0)
28
29#ifndef debug_printf_walker
30# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
31#endif
32#ifndef debug_printf_eval
33# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
34#endif
35
36
37
38#define MAXVARFMT 240
39#define MINNVBLOCK 64
40
41
42#define VF_NUMBER 0x0001
43#define VF_ARRAY 0x0002
44
45#define VF_CACHED 0x0100
46#define VF_USER 0x0200
47#define VF_SPECIAL 0x0400
48#define VF_WALK 0x0800
49#define VF_FSTR 0x1000
50#define VF_CHILD 0x2000
51#define VF_DIRTY 0x4000
52
53
54#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
55
56typedef struct walker_list {
57 char *end;
58 char *cur;
59 struct walker_list *prev;
60 char wbuf[1];
61} walker_list;
62
63
64typedef struct var_s {
65 unsigned type;
66 double number;
67 char *string;
68 union {
69 int aidx;
70 struct xhash_s *array;
71 struct var_s *parent;
72 walker_list *walker;
73 } x;
74} var;
75
76
77typedef struct chain_s {
78 struct node_s *first;
79 struct node_s *last;
80 const char *programname;
81} chain;
82
83
84typedef struct func_s {
85 unsigned nargs;
86 struct chain_s body;
87} func;
88
89
90typedef struct rstream_s {
91 FILE *F;
92 char *buffer;
93 int adv;
94 int size;
95 int pos;
96 smallint is_pipe;
97} rstream;
98
99typedef struct hash_item_s {
100 union {
101 struct var_s v;
102 struct rstream_s rs;
103 struct func_s f;
104 } data;
105 struct hash_item_s *next;
106 char name[1];
107} hash_item;
108
109typedef struct xhash_s {
110 unsigned nel;
111 unsigned csize;
112 unsigned nprime;
113 unsigned glen;
114 struct hash_item_s **items;
115} xhash;
116
117
118typedef struct node_s {
119 uint32_t info;
120 unsigned lineno;
121 union {
122 struct node_s *n;
123 var *v;
124 int aidx;
125 char *new_progname;
126 regex_t *re;
127 } l;
128 union {
129 struct node_s *n;
130 regex_t *ire;
131 func *f;
132 } r;
133 union {
134 struct node_s *n;
135 } a;
136} node;
137
138
139typedef struct nvblock_s {
140 int size;
141 var *pos;
142 struct nvblock_s *prev;
143 struct nvblock_s *next;
144 var nv[];
145} nvblock;
146
147typedef struct tsplitter_s {
148 node n;
149 regex_t re[2];
150} tsplitter;
151
152
153
154#define TC_SEQSTART 1
155#define TC_SEQTERM (1 << 1)
156#define TC_REGEXP (1 << 2)
157#define TC_OUTRDR (1 << 3)
158#define TC_UOPPOST (1 << 4)
159#define TC_UOPPRE1 (1 << 5)
160#define TC_BINOPX (1 << 6)
161#define TC_IN (1 << 7)
162#define TC_COMMA (1 << 8)
163#define TC_PIPE (1 << 9)
164#define TC_UOPPRE2 (1 << 10)
165#define TC_ARRTERM (1 << 11)
166#define TC_GRPSTART (1 << 12)
167#define TC_GRPTERM (1 << 13)
168#define TC_SEMICOL (1 << 14)
169#define TC_NEWLINE (1 << 15)
170#define TC_STATX (1 << 16)
171#define TC_WHILE (1 << 17)
172#define TC_ELSE (1 << 18)
173#define TC_BUILTIN (1 << 19)
174#define TC_GETLINE (1 << 20)
175#define TC_FUNCDECL (1 << 21)
176#define TC_BEGIN (1 << 22)
177#define TC_END (1 << 23)
178#define TC_EOF (1 << 24)
179#define TC_VARIABLE (1 << 25)
180#define TC_ARRAY (1 << 26)
181#define TC_FUNCTION (1 << 27)
182#define TC_STRING (1 << 28)
183#define TC_NUMBER (1 << 29)
184
185#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
186
187
188#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
189#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
190#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
191 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
192
193#define TC_STATEMNT (TC_STATX | TC_WHILE)
194#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
195
196
197#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
198 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
199
200
201#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
202 | TC_BINOP | TC_OPTERM)
203
204
205#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
206
207#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
208
209
210
211#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
212 | TC_STRING | TC_NUMBER | TC_UOPPOST)
213#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
214
215#define OF_RES1 0x010000
216#define OF_RES2 0x020000
217#define OF_STR1 0x040000
218#define OF_STR2 0x080000
219#define OF_NUM1 0x100000
220#define OF_CHECKED 0x200000
221
222
223#define xx 0
224#define xV OF_RES2
225#define xS (OF_RES2 | OF_STR2)
226#define Vx OF_RES1
227#define VV (OF_RES1 | OF_RES2)
228#define Nx (OF_RES1 | OF_NUM1)
229#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
230#define Sx (OF_RES1 | OF_STR1)
231#define SV (OF_RES1 | OF_STR1 | OF_RES2)
232#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
233
234#define OPCLSMASK 0xFF00
235#define OPNMASK 0x007F
236
237
238
239
240
241#define P(x) (x << 24)
242#define PRIMASK 0x7F000000
243#define PRIMASK2 0x7E000000
244
245
246
247#define SHIFT_TIL_THIS 0x0600
248#define RECUR_FROM_THIS 0x1000
249
250enum {
251 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
252 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
253
254 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
255 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
256 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
257
258 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
259 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
260 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
261 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
262 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
263 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
264 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
265 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
266 OC_DONE = 0x2800,
267
268 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
269 ST_WHILE = 0x3300
270};
271
272
273enum {
274 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
275 F_ti, F_le, F_sy, F_ff, F_cl
276};
277
278
279enum {
280 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
281 B_ge, B_gs, B_su,
282 B_an, B_co, B_ls, B_or, B_rs, B_xo,
283};
284
285
286
287#define NTC "\377"
288#define NTCC '\377'
289
290#define OC_B OC_BUILTIN
291
292static const char tokenlist[] ALIGN1 =
293 "\1(" NTC
294 "\1)" NTC
295 "\1/" NTC
296 "\2>>" "\1>" "\1|" NTC
297 "\2++" "\2--" NTC
298 "\2++" "\2--" "\1$" NTC
299 "\2==" "\1=" "\2+=" "\2-="
300 "\2*=" "\2/=" "\2%=" "\2^="
301 "\1+" "\1-" "\3**=" "\2**"
302 "\1/" "\1%" "\1^" "\1*"
303 "\2!=" "\2>=" "\2<=" "\1>"
304 "\1<" "\2!~" "\1~" "\2&&"
305 "\2||" "\1?" "\1:" NTC
306 "\2in" NTC
307 "\1," NTC
308 "\1|" NTC
309 "\1+" "\1-" "\1!" NTC
310 "\1]" NTC
311 "\1{" NTC
312 "\1}" NTC
313 "\1;" NTC
314 "\1\n" NTC
315 "\2if" "\2do" "\3for" "\5break"
316 "\10continue" "\6delete" "\5print"
317 "\6printf" "\4next" "\10nextfile"
318 "\6return" "\4exit" NTC
319 "\5while" NTC
320 "\4else" NTC
321
322 "\3and" "\5compl" "\6lshift" "\2or"
323 "\6rshift" "\3xor"
324 "\5close" "\6system" "\6fflush" "\5atan2"
325 "\3cos" "\3exp" "\3int" "\3log"
326 "\4rand" "\3sin" "\4sqrt" "\5srand"
327 "\6gensub" "\4gsub" "\5index" "\6length"
328 "\5match" "\5split" "\7sprintf" "\3sub"
329 "\6substr" "\7systime" "\10strftime" "\6mktime"
330 "\7tolower" "\7toupper" NTC
331 "\7getline" NTC
332 "\4func" "\10function" NTC
333 "\5BEGIN" NTC
334 "\3END"
335
336 ;
337
338static const uint32_t tokeninfo[] = {
339 0,
340 0,
341 OC_REGEXP,
342 xS|'a', xS|'w', xS|'|',
343 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
344 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
345 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
346 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
347 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
348 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
349 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
350 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
351 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
352 OC_IN|SV|P(49),
353 OC_COMMA|SS|P(80),
354 OC_PGETLINE|SV|P(37),
355 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
356 0,
357 0,
358 0,
359 0,
360 0,
361 ST_IF, ST_DO, ST_FOR, OC_BREAK,
362 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
363 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
364 OC_RETURN|Vx, OC_EXIT|Nx,
365 ST_WHILE,
366 0,
367
368 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
369 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
370 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
371 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
372 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
373 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
374 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
375 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
376 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
377 OC_GETLINE|SV|P(0),
378 0, 0,
379 0,
380 0
381};
382
383
384
385enum {
386 CONVFMT, OFMT, FS, OFS,
387 ORS, RS, RT, FILENAME,
388 SUBSEP, F0, ARGIND, ARGC,
389 ARGV, ERRNO, FNR, NR,
390 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
391};
392
393static const char vNames[] ALIGN1 =
394 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
395 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
396 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
397 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
398 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
399
400static const char vValues[] ALIGN1 =
401 "%.6g\0" "%.6g\0" " \0" " \0"
402 "\n\0" "\n\0" "\0" "\0"
403 "\034\0" "\0" "\377";
404
405
406#define FIRST_PRIME 61
407static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
408
409
410
411
412
413
414
415struct globals {
416 double t_double;
417 chain beginseq, mainseq, endseq;
418 chain *seq;
419 node *break_ptr, *continue_ptr;
420 rstream *iF;
421 xhash *vhash, *ahash, *fdhash, *fnhash;
422 const char *g_progname;
423 int g_lineno;
424 int nfields;
425 int maxfields;
426 var *Fields;
427 nvblock *g_cb;
428 char *g_pos;
429 char *g_buf;
430 smallint icase;
431 smallint exiting;
432 smallint nextrec;
433 smallint nextfile;
434 smallint is_f0_split;
435};
436struct globals2 {
437 uint32_t t_info;
438 uint32_t t_tclass;
439 char *t_string;
440 int t_lineno;
441 int t_rollback;
442
443 var *intvar[NUM_INTERNAL_VARS];
444
445
446 char *split_f0__fstrings;
447
448 uint32_t next_token__save_tclass;
449 uint32_t next_token__save_info;
450 uint32_t next_token__ltclass;
451 smallint next_token__concat_inserted;
452
453 smallint next_input_file__files_happen;
454 rstream next_input_file__rsm;
455
456 var *evaluate__fnargs;
457 unsigned evaluate__seed;
458 regex_t evaluate__sreg;
459
460 var ptest__v;
461
462 tsplitter exec_builtin__tspl;
463
464
465 tsplitter fsplitter, rsplitter;
466};
467#define G1 (ptr_to_globals[-1])
468#define G (*(struct globals2 *)ptr_to_globals)
469
470
471
472
473
474#define t_double (G1.t_double )
475#define beginseq (G1.beginseq )
476#define mainseq (G1.mainseq )
477#define endseq (G1.endseq )
478#define seq (G1.seq )
479#define break_ptr (G1.break_ptr )
480#define continue_ptr (G1.continue_ptr)
481#define iF (G1.iF )
482#define vhash (G1.vhash )
483#define ahash (G1.ahash )
484#define fdhash (G1.fdhash )
485#define fnhash (G1.fnhash )
486#define g_progname (G1.g_progname )
487#define g_lineno (G1.g_lineno )
488#define nfields (G1.nfields )
489#define maxfields (G1.maxfields )
490#define Fields (G1.Fields )
491#define g_cb (G1.g_cb )
492#define g_pos (G1.g_pos )
493#define g_buf (G1.g_buf )
494#define icase (G1.icase )
495#define exiting (G1.exiting )
496#define nextrec (G1.nextrec )
497#define nextfile (G1.nextfile )
498#define is_f0_split (G1.is_f0_split )
499#define t_info (G.t_info )
500#define t_tclass (G.t_tclass )
501#define t_string (G.t_string )
502#define t_lineno (G.t_lineno )
503#define t_rollback (G.t_rollback )
504#define intvar (G.intvar )
505#define fsplitter (G.fsplitter )
506#define rsplitter (G.rsplitter )
507#define INIT_G() do { \
508 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
509 G.next_token__ltclass = TC_OPTERM; \
510 G.evaluate__seed = 1; \
511} while (0)
512
513
514
515static void handle_special(var *);
516static node *parse_expr(uint32_t);
517static void chain_group(void);
518static var *evaluate(node *, var *);
519static rstream *next_input_file(void);
520static int fmt_num(char *, int, const char *, double, int);
521static int awk_exit(int) NORETURN;
522
523
524
525static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
526static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
527static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
528static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
529static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
530static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
531static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
532static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
533static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
534static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
535
536static void zero_out_var(var *vp)
537{
538 memset(vp, 0, sizeof(*vp));
539}
540
541static void syntax_error(const char *message) NORETURN;
542static void syntax_error(const char *message)
543{
544 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
545}
546
547
548
549static unsigned hashidx(const char *name)
550{
551 unsigned idx = 0;
552
553 while (*name)
554 idx = *name++ + (idx << 6) - idx;
555 return idx;
556}
557
558
559static xhash *hash_init(void)
560{
561 xhash *newhash;
562
563 newhash = xzalloc(sizeof(*newhash));
564 newhash->csize = FIRST_PRIME;
565 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
566
567 return newhash;
568}
569
570
571static void *hash_search(xhash *hash, const char *name)
572{
573 hash_item *hi;
574
575 hi = hash->items[hashidx(name) % hash->csize];
576 while (hi) {
577 if (strcmp(hi->name, name) == 0)
578 return &hi->data;
579 hi = hi->next;
580 }
581 return NULL;
582}
583
584
585static void hash_rebuild(xhash *hash)
586{
587 unsigned newsize, i, idx;
588 hash_item **newitems, *hi, *thi;
589
590 if (hash->nprime == ARRAY_SIZE(PRIMES))
591 return;
592
593 newsize = PRIMES[hash->nprime++];
594 newitems = xzalloc(newsize * sizeof(newitems[0]));
595
596 for (i = 0; i < hash->csize; i++) {
597 hi = hash->items[i];
598 while (hi) {
599 thi = hi;
600 hi = thi->next;
601 idx = hashidx(thi->name) % newsize;
602 thi->next = newitems[idx];
603 newitems[idx] = thi;
604 }
605 }
606
607 free(hash->items);
608 hash->csize = newsize;
609 hash->items = newitems;
610}
611
612
613static void *hash_find(xhash *hash, const char *name)
614{
615 hash_item *hi;
616 unsigned idx;
617 int l;
618
619 hi = hash_search(hash, name);
620 if (!hi) {
621 if (++hash->nel / hash->csize > 10)
622 hash_rebuild(hash);
623
624 l = strlen(name) + 1;
625 hi = xzalloc(sizeof(*hi) + l);
626 strcpy(hi->name, name);
627
628 idx = hashidx(name) % hash->csize;
629 hi->next = hash->items[idx];
630 hash->items[idx] = hi;
631 hash->glen += l;
632 }
633 return &hi->data;
634}
635
636#define findvar(hash, name) ((var*) hash_find((hash), (name)))
637#define newvar(name) ((var*) hash_find(vhash, (name)))
638#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
639#define newfunc(name) ((func*) hash_find(fnhash, (name)))
640
641static void hash_remove(xhash *hash, const char *name)
642{
643 hash_item *hi, **phi;
644
645 phi = &hash->items[hashidx(name) % hash->csize];
646 while (*phi) {
647 hi = *phi;
648 if (strcmp(hi->name, name) == 0) {
649 hash->glen -= (strlen(name) + 1);
650 hash->nel--;
651 *phi = hi->next;
652 free(hi);
653 break;
654 }
655 phi = &hi->next;
656 }
657}
658
659
660
661static char *skip_spaces(char *p)
662{
663 while (1) {
664 if (*p == '\\' && p[1] == '\n') {
665 p++;
666 t_lineno++;
667 } else if (*p != ' ' && *p != '\t') {
668 break;
669 }
670 p++;
671 }
672 return p;
673}
674
675
676static char *nextword(char **s)
677{
678 char *p = *s;
679 while (*(*s)++ != '\0')
680 continue;
681 return p;
682}
683
684static char nextchar(char **s)
685{
686 char c, *pps;
687
688 c = *(*s)++;
689 pps = *s;
690 if (c == '\\')
691 c = bb_process_escape_sequence((const char**)s);
692 if (c == '\\' && *s == pps) {
693 c = *(*s);
694 if (c)
695 (*s)++;
696 }
697 return c;
698}
699
700static ALWAYS_INLINE int isalnum_(int c)
701{
702 return (isalnum(c) || c == '_');
703}
704
705static double my_strtod(char **pp)
706{
707 char *cp = *pp;
708 if (ENABLE_DESKTOP && cp[0] == '0') {
709
710 char c = (cp[1] | 0x20);
711 if (c == 'x' || isdigit(cp[1])) {
712 unsigned long long ull = strtoull(cp, pp, 0);
713 if (c == 'x')
714 return ull;
715 c = **pp;
716 if (!isdigit(c) && c != '.')
717 return ull;
718
719
720
721
722
723 }
724 }
725 return strtod(cp, pp);
726}
727
728
729
730static xhash *iamarray(var *v)
731{
732 var *a = v;
733
734 while (a->type & VF_CHILD)
735 a = a->x.parent;
736
737 if (!(a->type & VF_ARRAY)) {
738 a->type |= VF_ARRAY;
739 a->x.array = hash_init();
740 }
741 return a->x.array;
742}
743
744static void clear_array(xhash *array)
745{
746 unsigned i;
747 hash_item *hi, *thi;
748
749 for (i = 0; i < array->csize; i++) {
750 hi = array->items[i];
751 while (hi) {
752 thi = hi;
753 hi = hi->next;
754 free(thi->data.v.string);
755 free(thi);
756 }
757 array->items[i] = NULL;
758 }
759 array->glen = array->nel = 0;
760}
761
762
763static var *clrvar(var *v)
764{
765 if (!(v->type & VF_FSTR))
766 free(v->string);
767
768 v->type &= VF_DONTTOUCH;
769 v->type |= VF_DIRTY;
770 v->string = NULL;
771 return v;
772}
773
774
775static var *setvar_p(var *v, char *value)
776{
777 clrvar(v);
778 v->string = value;
779 handle_special(v);
780 return v;
781}
782
783
784static var *setvar_s(var *v, const char *value)
785{
786 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
787}
788
789
790static var *setvar_u(var *v, const char *value)
791{
792 v = setvar_s(v, value);
793 v->type |= VF_USER;
794 return v;
795}
796
797
798static void setari_u(var *a, int idx, const char *s)
799{
800 var *v;
801
802 v = findvar(iamarray(a), itoa(idx));
803 setvar_u(v, s);
804}
805
806
807static var *setvar_i(var *v, double value)
808{
809 clrvar(v);
810 v->type |= VF_NUMBER;
811 v->number = value;
812 handle_special(v);
813 return v;
814}
815
816static const char *getvar_s(var *v)
817{
818
819 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
820 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
821 v->string = xstrdup(g_buf);
822 v->type |= VF_CACHED;
823 }
824 return (v->string == NULL) ? "" : v->string;
825}
826
827static double getvar_i(var *v)
828{
829 char *s;
830
831 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
832 v->number = 0;
833 s = v->string;
834 if (s && *s) {
835 debug_printf_eval("getvar_i: '%s'->", s);
836 v->number = my_strtod(&s);
837 debug_printf_eval("%f (s:'%s')\n", v->number, s);
838 if (v->type & VF_USER) {
839 s = skip_spaces(s);
840 if (*s != '\0')
841 v->type &= ~VF_USER;
842 }
843 } else {
844 debug_printf_eval("getvar_i: '%s'->zero\n", s);
845 v->type &= ~VF_USER;
846 }
847 v->type |= VF_CACHED;
848 }
849 debug_printf_eval("getvar_i: %f\n", v->number);
850 return v->number;
851}
852
853
854static unsigned long getvar_i_int(var *v)
855{
856 double d = getvar_i(v);
857
858
859
860 if (d >= 0)
861 return (unsigned long)d;
862
863 return - (long) (unsigned long) (-d);
864}
865
866static var *copyvar(var *dest, const var *src)
867{
868 if (dest != src) {
869 clrvar(dest);
870 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
871 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
872 dest->number = src->number;
873 if (src->string)
874 dest->string = xstrdup(src->string);
875 }
876 handle_special(dest);
877 return dest;
878}
879
880static var *incvar(var *v)
881{
882 return setvar_i(v, getvar_i(v) + 1.0);
883}
884
885
886static int is_numeric(var *v)
887{
888 getvar_i(v);
889 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
890}
891
892
893static int istrue(var *v)
894{
895 if (is_numeric(v))
896 return (v->number != 0);
897 return (v->string && v->string[0]);
898}
899
900
901static var *nvalloc(int n)
902{
903 nvblock *pb = NULL;
904 var *v, *r;
905 int size;
906
907 while (g_cb) {
908 pb = g_cb;
909 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
910 break;
911 g_cb = g_cb->next;
912 }
913
914 if (!g_cb) {
915 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
916 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
917 g_cb->size = size;
918 g_cb->pos = g_cb->nv;
919 g_cb->prev = pb;
920
921 if (pb)
922 pb->next = g_cb;
923 }
924
925 v = r = g_cb->pos;
926 g_cb->pos += n;
927
928 while (v < g_cb->pos) {
929 v->type = 0;
930 v->string = NULL;
931 v++;
932 }
933
934 return r;
935}
936
937static void nvfree(var *v)
938{
939 var *p;
940
941 if (v < g_cb->nv || v >= g_cb->pos)
942 syntax_error(EMSG_INTERNAL_ERROR);
943
944 for (p = v; p < g_cb->pos; p++) {
945 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
946 clear_array(iamarray(p));
947 free(p->x.array->items);
948 free(p->x.array);
949 }
950 if (p->type & VF_WALK) {
951 walker_list *n;
952 walker_list *w = p->x.walker;
953 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
954 p->x.walker = NULL;
955 while (w) {
956 n = w->prev;
957 debug_printf_walker(" free(%p)\n", w);
958 free(w);
959 w = n;
960 }
961 }
962 clrvar(p);
963 }
964
965 g_cb->pos = v;
966 while (g_cb->prev && g_cb->pos == g_cb->nv) {
967 g_cb = g_cb->prev;
968 }
969}
970
971
972
973
974
975
976static uint32_t next_token(uint32_t expected)
977{
978#define concat_inserted (G.next_token__concat_inserted)
979#define save_tclass (G.next_token__save_tclass)
980#define save_info (G.next_token__save_info)
981
982#define ltclass (G.next_token__ltclass)
983
984 char *p, *s;
985 const char *tl;
986 uint32_t tc;
987 const uint32_t *ti;
988
989 if (t_rollback) {
990 t_rollback = FALSE;
991
992 } else if (concat_inserted) {
993 concat_inserted = FALSE;
994 t_tclass = save_tclass;
995 t_info = save_info;
996
997 } else {
998 p = g_pos;
999 readnext:
1000 p = skip_spaces(p);
1001 g_lineno = t_lineno;
1002 if (*p == '#')
1003 while (*p != '\n' && *p != '\0')
1004 p++;
1005
1006 if (*p == '\n')
1007 t_lineno++;
1008
1009 if (*p == '\0') {
1010 tc = TC_EOF;
1011
1012 } else if (*p == '\"') {
1013
1014 t_string = s = ++p;
1015 while (*p != '\"') {
1016 char *pp;
1017 if (*p == '\0' || *p == '\n')
1018 syntax_error(EMSG_UNEXP_EOS);
1019 pp = p;
1020 *s++ = nextchar(&pp);
1021 p = pp;
1022 }
1023 p++;
1024 *s = '\0';
1025 tc = TC_STRING;
1026
1027 } else if ((expected & TC_REGEXP) && *p == '/') {
1028
1029 t_string = s = ++p;
1030 while (*p != '/') {
1031 if (*p == '\0' || *p == '\n')
1032 syntax_error(EMSG_UNEXP_EOS);
1033 *s = *p++;
1034 if (*s++ == '\\') {
1035 char *pp = p;
1036 s[-1] = bb_process_escape_sequence((const char **)&pp);
1037 if (*p == '\\')
1038 *s++ = '\\';
1039 if (pp == p)
1040 *s++ = *p++;
1041 else
1042 p = pp;
1043 }
1044 }
1045 p++;
1046 *s = '\0';
1047 tc = TC_REGEXP;
1048
1049 } else if (*p == '.' || isdigit(*p)) {
1050
1051 char *pp = p;
1052 t_double = my_strtod(&pp);
1053 p = pp;
1054 if (*p == '.')
1055 syntax_error(EMSG_UNEXP_TOKEN);
1056 tc = TC_NUMBER;
1057
1058 } else {
1059
1060 tl = tokenlist;
1061 tc = 0x00000001;
1062 ti = tokeninfo;
1063 while (*tl) {
1064 int l = (unsigned char) *tl++;
1065 if (l == (unsigned char) NTCC) {
1066 tc <<= 1;
1067 continue;
1068 }
1069
1070
1071
1072
1073 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1074 && strncmp(p, tl, l) == 0
1075 && !((tc & TC_WORD) && isalnum_(p[l]))
1076 ) {
1077
1078 t_info = *ti;
1079 p += l;
1080 goto token_found;
1081 }
1082 ti++;
1083 tl += l;
1084 }
1085
1086
1087
1088 if (!isalnum_(*p))
1089 syntax_error(EMSG_UNEXP_TOKEN);
1090
1091 t_string = --p;
1092 while (isalnum_(*++p)) {
1093 p[-1] = *p;
1094 }
1095 p[-1] = '\0';
1096 tc = TC_VARIABLE;
1097
1098 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1099 p = skip_spaces(p);
1100 if (*p == '(') {
1101 tc = TC_FUNCTION;
1102 } else {
1103 if (*p == '[') {
1104 p++;
1105 tc = TC_ARRAY;
1106 }
1107 }
1108 token_found: ;
1109 }
1110 g_pos = p;
1111
1112
1113 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1114 goto readnext;
1115
1116
1117 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1118 concat_inserted = TRUE;
1119 save_tclass = tc;
1120 save_info = t_info;
1121 tc = TC_BINOP;
1122 t_info = OC_CONCAT | SS | P(35);
1123 }
1124
1125 t_tclass = tc;
1126 }
1127 ltclass = t_tclass;
1128
1129
1130 if (!(ltclass & expected))
1131 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1132 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1133
1134 return ltclass;
1135#undef concat_inserted
1136#undef save_tclass
1137#undef save_info
1138#undef ltclass
1139}
1140
1141static void rollback_token(void)
1142{
1143 t_rollback = TRUE;
1144}
1145
1146static node *new_node(uint32_t info)
1147{
1148 node *n;
1149
1150 n = xzalloc(sizeof(node));
1151 n->info = info;
1152 n->lineno = g_lineno;
1153 return n;
1154}
1155
1156static void mk_re_node(const char *s, node *n, regex_t *re)
1157{
1158 n->info = OC_REGEXP;
1159 n->l.re = re;
1160 n->r.ire = re + 1;
1161 xregcomp(re, s, REG_EXTENDED);
1162 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1163}
1164
1165static node *condition(void)
1166{
1167 next_token(TC_SEQSTART);
1168 return parse_expr(TC_SEQTERM);
1169}
1170
1171
1172
1173static node *parse_expr(uint32_t iexp)
1174{
1175 node sn;
1176 node *cn = &sn;
1177 node *vn, *glptr;
1178 uint32_t tc, xtc;
1179 var *v;
1180
1181 sn.info = PRIMASK;
1182 sn.r.n = glptr = NULL;
1183 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1184
1185 while (!((tc = next_token(xtc)) & iexp)) {
1186
1187 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1188
1189 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1190 cn->a.n = glptr;
1191 xtc = TC_OPERAND | TC_UOPPRE;
1192 glptr = NULL;
1193
1194 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1195
1196
1197 vn = cn;
1198 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1199 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1200 ) {
1201 vn = vn->a.n;
1202 }
1203 if ((t_info & OPCLSMASK) == OC_TERNARY)
1204 t_info += P(6);
1205 cn = vn->a.n->r.n = new_node(t_info);
1206 cn->a.n = vn->a.n;
1207 if (tc & TC_BINOP) {
1208 cn->l.n = vn;
1209 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1210 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1211
1212 next_token(TC_GETLINE);
1213
1214 cn->info &= ~PRIMASK;
1215 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1216 }
1217 } else {
1218 cn->r.n = vn;
1219 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1220 }
1221 vn->a.n = cn;
1222
1223 } else {
1224
1225
1226 vn = cn;
1227 cn = vn->r.n = new_node(t_info);
1228 cn->a.n = vn;
1229 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1230 if (tc & (TC_OPERAND | TC_REGEXP)) {
1231 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1232
1233
1234 switch (tc) {
1235 case TC_VARIABLE:
1236 case TC_ARRAY:
1237 cn->info = OC_VAR;
1238 v = hash_search(ahash, t_string);
1239 if (v != NULL) {
1240 cn->info = OC_FNARG;
1241 cn->l.aidx = v->x.aidx;
1242 } else {
1243 cn->l.v = newvar(t_string);
1244 }
1245 if (tc & TC_ARRAY) {
1246 cn->info |= xS;
1247 cn->r.n = parse_expr(TC_ARRTERM);
1248 }
1249 break;
1250
1251 case TC_NUMBER:
1252 case TC_STRING:
1253 cn->info = OC_VAR;
1254 v = cn->l.v = xzalloc(sizeof(var));
1255 if (tc & TC_NUMBER)
1256 setvar_i(v, t_double);
1257 else
1258 setvar_s(v, t_string);
1259 break;
1260
1261 case TC_REGEXP:
1262 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1263 break;
1264
1265 case TC_FUNCTION:
1266 cn->info = OC_FUNC;
1267 cn->r.f = newfunc(t_string);
1268 cn->l.n = condition();
1269 break;
1270
1271 case TC_SEQSTART:
1272 cn = vn->r.n = parse_expr(TC_SEQTERM);
1273 cn->a.n = vn;
1274 break;
1275
1276 case TC_GETLINE:
1277 glptr = cn;
1278 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1279 break;
1280
1281 case TC_BUILTIN:
1282 cn->l.n = condition();
1283 break;
1284 }
1285 }
1286 }
1287 }
1288 return sn.r.n;
1289}
1290
1291
1292static node *chain_node(uint32_t info)
1293{
1294 node *n;
1295
1296 if (!seq->first)
1297 seq->first = seq->last = new_node(0);
1298
1299 if (seq->programname != g_progname) {
1300 seq->programname = g_progname;
1301 n = chain_node(OC_NEWSOURCE);
1302 n->l.new_progname = xstrdup(g_progname);
1303 }
1304
1305 n = seq->last;
1306 n->info = info;
1307 seq->last = n->a.n = new_node(OC_DONE);
1308
1309 return n;
1310}
1311
1312static void chain_expr(uint32_t info)
1313{
1314 node *n;
1315
1316 n = chain_node(info);
1317 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1318 if (t_tclass & TC_GRPTERM)
1319 rollback_token();
1320}
1321
1322static node *chain_loop(node *nn)
1323{
1324 node *n, *n2, *save_brk, *save_cont;
1325
1326 save_brk = break_ptr;
1327 save_cont = continue_ptr;
1328
1329 n = chain_node(OC_BR | Vx);
1330 continue_ptr = new_node(OC_EXEC);
1331 break_ptr = new_node(OC_EXEC);
1332 chain_group();
1333 n2 = chain_node(OC_EXEC | Vx);
1334 n2->l.n = nn;
1335 n2->a.n = n;
1336 continue_ptr->a.n = n2;
1337 break_ptr->a.n = n->r.n = seq->last;
1338
1339 continue_ptr = save_cont;
1340 break_ptr = save_brk;
1341
1342 return n;
1343}
1344
1345
1346static void chain_group(void)
1347{
1348 uint32_t c;
1349 node *n, *n2, *n3;
1350
1351 do {
1352 c = next_token(TC_GRPSEQ);
1353 } while (c & TC_NEWLINE);
1354
1355 if (c & TC_GRPSTART) {
1356 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1357 if (t_tclass & TC_NEWLINE)
1358 continue;
1359 rollback_token();
1360 chain_group();
1361 }
1362 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1363 rollback_token();
1364 chain_expr(OC_EXEC | Vx);
1365 } else {
1366 switch (t_info & OPCLSMASK) {
1367 case ST_IF:
1368 n = chain_node(OC_BR | Vx);
1369 n->l.n = condition();
1370 chain_group();
1371 n2 = chain_node(OC_EXEC);
1372 n->r.n = seq->last;
1373 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1374 chain_group();
1375 n2->a.n = seq->last;
1376 } else {
1377 rollback_token();
1378 }
1379 break;
1380
1381 case ST_WHILE:
1382 n2 = condition();
1383 n = chain_loop(NULL);
1384 n->l.n = n2;
1385 break;
1386
1387 case ST_DO:
1388 n2 = chain_node(OC_EXEC);
1389 n = chain_loop(NULL);
1390 n2->a.n = n->a.n;
1391 next_token(TC_WHILE);
1392 n->l.n = condition();
1393 break;
1394
1395 case ST_FOR:
1396 next_token(TC_SEQSTART);
1397 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1398 if (t_tclass & TC_SEQTERM) {
1399 if ((n2->info & OPCLSMASK) != OC_IN)
1400 syntax_error(EMSG_UNEXP_TOKEN);
1401 n = chain_node(OC_WALKINIT | VV);
1402 n->l.n = n2->l.n;
1403 n->r.n = n2->r.n;
1404 n = chain_loop(NULL);
1405 n->info = OC_WALKNEXT | Vx;
1406 n->l.n = n2->l.n;
1407 } else {
1408 n = chain_node(OC_EXEC | Vx);
1409 n->l.n = n2;
1410 n2 = parse_expr(TC_SEMICOL);
1411 n3 = parse_expr(TC_SEQTERM);
1412 n = chain_loop(n3);
1413 n->l.n = n2;
1414 if (!n2)
1415 n->info = OC_EXEC;
1416 }
1417 break;
1418
1419 case OC_PRINT:
1420 case OC_PRINTF:
1421 n = chain_node(t_info);
1422 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1423 if (t_tclass & TC_OUTRDR) {
1424 n->info |= t_info;
1425 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1426 }
1427 if (t_tclass & TC_GRPTERM)
1428 rollback_token();
1429 break;
1430
1431 case OC_BREAK:
1432 n = chain_node(OC_EXEC);
1433 n->a.n = break_ptr;
1434 break;
1435
1436 case OC_CONTINUE:
1437 n = chain_node(OC_EXEC);
1438 n->a.n = continue_ptr;
1439 break;
1440
1441
1442 default:
1443 chain_expr(t_info);
1444 }
1445 }
1446}
1447
1448static void parse_program(char *p)
1449{
1450 uint32_t tclass;
1451 node *cn;
1452 func *f;
1453 var *v;
1454
1455 g_pos = p;
1456 t_lineno = 1;
1457 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1458 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1459
1460 if (tclass & TC_OPTERM)
1461 continue;
1462
1463 seq = &mainseq;
1464 if (tclass & TC_BEGIN) {
1465 seq = &beginseq;
1466 chain_group();
1467
1468 } else if (tclass & TC_END) {
1469 seq = &endseq;
1470 chain_group();
1471
1472 } else if (tclass & TC_FUNCDECL) {
1473 next_token(TC_FUNCTION);
1474 g_pos++;
1475 f = newfunc(t_string);
1476 f->body.first = NULL;
1477 f->nargs = 0;
1478 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1479 v = findvar(ahash, t_string);
1480 v->x.aidx = f->nargs++;
1481
1482 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1483 break;
1484 }
1485 seq = &f->body;
1486 chain_group();
1487 clear_array(ahash);
1488
1489 } else if (tclass & TC_OPSEQ) {
1490 rollback_token();
1491 cn = chain_node(OC_TEST);
1492 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1493 if (t_tclass & TC_GRPSTART) {
1494 rollback_token();
1495 chain_group();
1496 } else {
1497 chain_node(OC_PRINT);
1498 }
1499 cn->r.n = mainseq.last;
1500
1501 } else {
1502 rollback_token();
1503 chain_group();
1504 }
1505 }
1506}
1507
1508
1509
1510
1511static node *mk_splitter(const char *s, tsplitter *spl)
1512{
1513 regex_t *re, *ire;
1514 node *n;
1515
1516 re = &spl->re[0];
1517 ire = &spl->re[1];
1518 n = &spl->n;
1519 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1520 regfree(re);
1521 regfree(ire);
1522 }
1523 if (s[0] && s[1]) {
1524 mk_re_node(s, n, re);
1525 } else {
1526 n->info = (uint32_t) s[0];
1527 }
1528
1529 return n;
1530}
1531
1532
1533
1534
1535
1536static regex_t *as_regex(node *op, regex_t *preg)
1537{
1538 int cflags;
1539 var *v;
1540 const char *s;
1541
1542 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1543 return icase ? op->r.ire : op->l.re;
1544 }
1545 v = nvalloc(1);
1546 s = getvar_s(evaluate(op, v));
1547
1548 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1549
1550
1551
1552
1553
1554 if (regcomp(preg, s, cflags)) {
1555 cflags &= ~REG_EXTENDED;
1556 xregcomp(preg, s, cflags);
1557 }
1558 nvfree(v);
1559 return preg;
1560}
1561
1562
1563
1564
1565
1566static char* qrealloc(char *b, int n, int *size)
1567{
1568 if (!b || n >= *size) {
1569 *size = n + (n>>1) + 80;
1570 b = xrealloc(b, *size);
1571 }
1572 return b;
1573}
1574
1575
1576static void fsrealloc(int size)
1577{
1578 int i;
1579
1580 if (size >= maxfields) {
1581 i = maxfields;
1582 maxfields = size + 16;
1583 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1584 for (; i < maxfields; i++) {
1585 Fields[i].type = VF_SPECIAL;
1586 Fields[i].string = NULL;
1587 }
1588 }
1589
1590 for (i = size; i < nfields; i++) {
1591 clrvar(Fields + i);
1592 }
1593 nfields = size;
1594}
1595
1596static int awk_split(const char *s, node *spl, char **slist)
1597{
1598 int l, n;
1599 char c[4];
1600 char *s1;
1601 regmatch_t pmatch[2];
1602
1603
1604 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1605 strcpy(s1, s);
1606
1607 c[0] = c[1] = (char)spl->info;
1608 c[2] = c[3] = '\0';
1609 if (*getvar_s(intvar[RS]) == '\0')
1610 c[2] = '\n';
1611
1612 n = 0;
1613 if ((spl->info & OPCLSMASK) == OC_REGEXP) {
1614 if (!*s)
1615 return n;
1616 n++;
1617 do {
1618 l = strcspn(s, c+2);
1619 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1620 && pmatch[0].rm_so <= l
1621 ) {
1622 l = pmatch[0].rm_so;
1623 if (pmatch[0].rm_eo == 0) {
1624 l++;
1625 pmatch[0].rm_eo++;
1626 }
1627 n++;
1628 } else {
1629 pmatch[0].rm_eo = l;
1630 if (s[l])
1631 pmatch[0].rm_eo++;
1632 }
1633 memcpy(s1, s, l);
1634
1635 do {
1636 s1[l] = '\0';
1637 } while (++l < pmatch[0].rm_eo);
1638 nextword(&s1);
1639 s += pmatch[0].rm_eo;
1640 } while (*s);
1641 return n;
1642 }
1643 if (c[0] == '\0') {
1644 while (*s) {
1645 *s1++ = *s++;
1646 *s1++ = '\0';
1647 n++;
1648 }
1649 return n;
1650 }
1651 if (c[0] != ' ') {
1652 if (icase) {
1653 c[0] = toupper(c[0]);
1654 c[1] = tolower(c[1]);
1655 }
1656 if (*s1)
1657 n++;
1658 while ((s1 = strpbrk(s1, c)) != NULL) {
1659 *s1++ = '\0';
1660 n++;
1661 }
1662 return n;
1663 }
1664
1665 while (*s) {
1666 s = skip_whitespace(s);
1667 if (!*s)
1668 break;
1669 n++;
1670 while (*s && !isspace(*s))
1671 *s1++ = *s++;
1672 *s1++ = '\0';
1673 }
1674 return n;
1675}
1676
1677static void split_f0(void)
1678{
1679
1680#define fstrings (G.split_f0__fstrings)
1681
1682 int i, n;
1683 char *s;
1684
1685 if (is_f0_split)
1686 return;
1687
1688 is_f0_split = TRUE;
1689 free(fstrings);
1690 fsrealloc(0);
1691 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1692 fsrealloc(n);
1693 s = fstrings;
1694 for (i = 0; i < n; i++) {
1695 Fields[i].string = nextword(&s);
1696 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1697 }
1698
1699
1700 clrvar(intvar[NF]);
1701 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1702 intvar[NF]->number = nfields;
1703#undef fstrings
1704}
1705
1706
1707static void handle_special(var *v)
1708{
1709 int n;
1710 char *b;
1711 const char *sep, *s;
1712 int sl, l, len, i, bsize;
1713
1714 if (!(v->type & VF_SPECIAL))
1715 return;
1716
1717 if (v == intvar[NF]) {
1718 n = (int)getvar_i(v);
1719 fsrealloc(n);
1720
1721
1722 sep = getvar_s(intvar[OFS]);
1723 sl = strlen(sep);
1724 b = NULL;
1725 len = 0;
1726 for (i = 0; i < n; i++) {
1727 s = getvar_s(&Fields[i]);
1728 l = strlen(s);
1729 if (b) {
1730 memcpy(b+len, sep, sl);
1731 len += sl;
1732 }
1733 b = qrealloc(b, len+l+sl, &bsize);
1734 memcpy(b+len, s, l);
1735 len += l;
1736 }
1737 if (b)
1738 b[len] = '\0';
1739 setvar_p(intvar[F0], b);
1740 is_f0_split = TRUE;
1741
1742 } else if (v == intvar[F0]) {
1743 is_f0_split = FALSE;
1744
1745 } else if (v == intvar[FS]) {
1746 mk_splitter(getvar_s(v), &fsplitter);
1747
1748 } else if (v == intvar[RS]) {
1749 mk_splitter(getvar_s(v), &rsplitter);
1750
1751 } else if (v == intvar[IGNORECASE]) {
1752 icase = istrue(v);
1753
1754 } else {
1755 n = getvar_i(intvar[NF]);
1756 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1757
1758 }
1759}
1760
1761
1762static node *nextarg(node **pn)
1763{
1764 node *n;
1765
1766 n = *pn;
1767 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1768 *pn = n->r.n;
1769 n = n->l.n;
1770 } else {
1771 *pn = NULL;
1772 }
1773 return n;
1774}
1775
1776static void hashwalk_init(var *v, xhash *array)
1777{
1778 hash_item *hi;
1779 unsigned i;
1780 walker_list *w;
1781 walker_list *prev_walker;
1782
1783 if (v->type & VF_WALK) {
1784 prev_walker = v->x.walker;
1785 } else {
1786 v->type |= VF_WALK;
1787 prev_walker = NULL;
1788 }
1789 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1790
1791 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1);
1792 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1793 w->cur = w->end = w->wbuf;
1794 w->prev = prev_walker;
1795 for (i = 0; i < array->csize; i++) {
1796 hi = array->items[i];
1797 while (hi) {
1798 strcpy(w->end, hi->name);
1799 nextword(&w->end);
1800 hi = hi->next;
1801 }
1802 }
1803}
1804
1805static int hashwalk_next(var *v)
1806{
1807 walker_list *w = v->x.walker;
1808
1809 if (w->cur >= w->end) {
1810 walker_list *prev_walker = w->prev;
1811
1812 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1813 free(w);
1814 v->x.walker = prev_walker;
1815 return FALSE;
1816 }
1817
1818 setvar_s(v, nextword(&w->cur));
1819 return TRUE;
1820}
1821
1822
1823static int ptest(node *pattern)
1824{
1825
1826 return istrue(evaluate(pattern, &G.ptest__v));
1827}
1828
1829
1830static int awk_getline(rstream *rsm, var *v)
1831{
1832 char *b;
1833 regmatch_t pmatch[2];
1834 int size, a, p, pp = 0;
1835 int fd, so, eo, r, rp;
1836 char c, *m, *s;
1837
1838 debug_printf_eval("entered %s()\n", __func__);
1839
1840
1841
1842
1843 fd = fileno(rsm->F);
1844 m = rsm->buffer;
1845 a = rsm->adv;
1846 p = rsm->pos;
1847 size = rsm->size;
1848 c = (char) rsplitter.n.info;
1849 rp = 0;
1850
1851 if (!m)
1852 m = qrealloc(m, 256, &size);
1853
1854 do {
1855 b = m + a;
1856 so = eo = p;
1857 r = 1;
1858 if (p > 0) {
1859 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1860 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1861 b, 1, pmatch, 0) == 0) {
1862 so = pmatch[0].rm_so;
1863 eo = pmatch[0].rm_eo;
1864 if (b[eo] != '\0')
1865 break;
1866 }
1867 } else if (c != '\0') {
1868 s = strchr(b+pp, c);
1869 if (!s)
1870 s = memchr(b+pp, '\0', p - pp);
1871 if (s) {
1872 so = eo = s-b;
1873 eo++;
1874 break;
1875 }
1876 } else {
1877 while (b[rp] == '\n')
1878 rp++;
1879 s = strstr(b+rp, "\n\n");
1880 if (s) {
1881 so = eo = s-b;
1882 while (b[eo] == '\n')
1883 eo++;
1884 if (b[eo] != '\0')
1885 break;
1886 }
1887 }
1888 }
1889
1890 if (a > 0) {
1891 memmove(m, m+a, p+1);
1892 b = m;
1893 a = 0;
1894 }
1895
1896 m = qrealloc(m, a+p+128, &size);
1897 b = m + a;
1898 pp = p;
1899 p += safe_read(fd, b+p, size-p-1);
1900 if (p < pp) {
1901 p = 0;
1902 r = 0;
1903 setvar_i(intvar[ERRNO], errno);
1904 }
1905 b[p] = '\0';
1906
1907 } while (p > pp);
1908
1909 if (p == 0) {
1910 r--;
1911 } else {
1912 c = b[so]; b[so] = '\0';
1913 setvar_s(v, b+rp);
1914 v->type |= VF_USER;
1915 b[so] = c;
1916 c = b[eo]; b[eo] = '\0';
1917 setvar_s(intvar[RT], b+so);
1918 b[eo] = c;
1919 }
1920
1921 rsm->buffer = m;
1922 rsm->adv = a + eo;
1923 rsm->pos = p - eo;
1924 rsm->size = size;
1925
1926 debug_printf_eval("returning from %s(): %d\n", __func__, r);
1927
1928 return r;
1929}
1930
1931static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1932{
1933 int r = 0;
1934 char c;
1935 const char *s = format;
1936
1937 if (int_as_int && n == (int)n) {
1938 r = snprintf(b, size, "%d", (int)n);
1939 } else {
1940 do { c = *s; } while (c && *++s);
1941 if (strchr("diouxX", c)) {
1942 r = snprintf(b, size, format, (int)n);
1943 } else if (strchr("eEfgG", c)) {
1944 r = snprintf(b, size, format, n);
1945 } else {
1946 syntax_error(EMSG_INV_FMT);
1947 }
1948 }
1949 return r;
1950}
1951
1952
1953static char *awk_printf(node *n)
1954{
1955 char *b = NULL;
1956 char *fmt, *s, *f;
1957 const char *s1;
1958 int i, j, incr, bsize;
1959 char c, c1;
1960 var *v, *arg;
1961
1962 v = nvalloc(1);
1963 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1964
1965 i = 0;
1966 while (*f) {
1967 s = f;
1968 while (*f && (*f != '%' || *++f == '%'))
1969 f++;
1970 while (*f && !isalpha(*f)) {
1971 if (*f == '*')
1972 syntax_error("%*x formats are not supported");
1973 f++;
1974 }
1975
1976 incr = (f - s) + MAXVARFMT;
1977 b = qrealloc(b, incr + i, &bsize);
1978 c = *f;
1979 if (c != '\0')
1980 f++;
1981 c1 = *f;
1982 *f = '\0';
1983 arg = evaluate(nextarg(&n), v);
1984
1985 j = i;
1986 if (c == 'c' || !c) {
1987 i += sprintf(b+i, s, is_numeric(arg) ?
1988 (char)getvar_i(arg) : *getvar_s(arg));
1989 } else if (c == 's') {
1990 s1 = getvar_s(arg);
1991 b = qrealloc(b, incr+i+strlen(s1), &bsize);
1992 i += sprintf(b+i, s, s1);
1993 } else {
1994 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1995 }
1996 *f = c1;
1997
1998
1999 if (i < j)
2000 i = j;
2001 }
2002
2003 free(fmt);
2004 nvfree(v);
2005 b = xrealloc(b, i + 1);
2006 b[i] = '\0';
2007 return b;
2008}
2009
2010
2011
2012
2013
2014
2015
2016
2017static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2018{
2019 char *resbuf;
2020 const char *sp;
2021 int match_no, residx, replen, resbufsize;
2022 int regexec_flags;
2023 regmatch_t pmatch[10];
2024 regex_t sreg, *regex;
2025
2026 resbuf = NULL;
2027 residx = 0;
2028 match_no = 0;
2029 regexec_flags = 0;
2030 regex = as_regex(rn, &sreg);
2031 sp = getvar_s(src ? src : intvar[F0]);
2032 replen = strlen(repl);
2033 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2034 int so = pmatch[0].rm_so;
2035 int eo = pmatch[0].rm_eo;
2036
2037
2038 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2039 memcpy(resbuf + residx, sp, eo);
2040 residx += eo;
2041 if (++match_no >= nm) {
2042 const char *s;
2043 int nbs;
2044
2045
2046 residx -= (eo - so);
2047 nbs = 0;
2048 for (s = repl; *s; s++) {
2049 char c = resbuf[residx++] = *s;
2050 if (c == '\\') {
2051 nbs++;
2052 continue;
2053 }
2054 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2055 int j;
2056 residx -= ((nbs + 3) >> 1);
2057 j = 0;
2058 if (c != '&') {
2059 j = c - '0';
2060 nbs++;
2061 }
2062 if (nbs % 2) {
2063 resbuf[residx++] = c;
2064 } else {
2065 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2066 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2067 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2068 residx += n;
2069 }
2070 }
2071 nbs = 0;
2072 }
2073 }
2074
2075 regexec_flags = REG_NOTBOL;
2076 sp += eo;
2077 if (match_no == nm)
2078 break;
2079 if (eo == so) {
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090 resbuf[residx] = *sp;
2091 if (*sp == '\0')
2092 goto ret;
2093 sp++;
2094 residx++;
2095 }
2096 }
2097
2098 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2099 strcpy(resbuf + residx, sp);
2100 ret:
2101
2102 setvar_p(dest ? dest : intvar[F0], resbuf);
2103 if (regex == &sreg)
2104 regfree(regex);
2105 return match_no;
2106}
2107
2108static NOINLINE int do_mktime(const char *ds)
2109{
2110 struct tm then;
2111 int count;
2112
2113
2114 then.tm_isdst = -1;
2115
2116
2117
2118 count = sscanf(ds, "%u %u %u %u %u %u %d",
2119 &then.tm_year, &then.tm_mon, &then.tm_mday,
2120 &then.tm_hour, &then.tm_min, &then.tm_sec,
2121 &then.tm_isdst);
2122
2123 if (count < 6
2124 || (unsigned)then.tm_mon < 1
2125 || (unsigned)then.tm_year < 1900
2126 ) {
2127 return -1;
2128 }
2129
2130 then.tm_mon -= 1;
2131 then.tm_year -= 1900;
2132
2133 return mktime(&then);
2134}
2135
2136static NOINLINE var *exec_builtin(node *op, var *res)
2137{
2138#define tspl (G.exec_builtin__tspl)
2139
2140 var *tv;
2141 node *an[4];
2142 var *av[4];
2143 const char *as[4];
2144 regmatch_t pmatch[2];
2145 regex_t sreg, *re;
2146 node *spl;
2147 uint32_t isr, info;
2148 int nargs;
2149 time_t tt;
2150 int i, l, ll, n;
2151
2152 tv = nvalloc(4);
2153 isr = info = op->info;
2154 op = op->l.n;
2155
2156 av[2] = av[3] = NULL;
2157 for (i = 0; i < 4 && op; i++) {
2158 an[i] = nextarg(&op);
2159 if (isr & 0x09000000)
2160 av[i] = evaluate(an[i], &tv[i]);
2161 if (isr & 0x08000000)
2162 as[i] = getvar_s(av[i]);
2163 isr >>= 1;
2164 }
2165
2166 nargs = i;
2167 if ((uint32_t)nargs < (info >> 30))
2168 syntax_error(EMSG_TOO_FEW_ARGS);
2169
2170 info &= OPNMASK;
2171 switch (info) {
2172
2173 case B_a2:
2174 if (ENABLE_FEATURE_AWK_LIBM)
2175 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2176 else
2177 syntax_error(EMSG_NO_MATH);
2178 break;
2179
2180 case B_sp: {
2181 char *s, *s1;
2182
2183 if (nargs > 2) {
2184 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2185 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2186 } else {
2187 spl = &fsplitter.n;
2188 }
2189
2190 n = awk_split(as[0], spl, &s);
2191 s1 = s;
2192 clear_array(iamarray(av[1]));
2193 for (i = 1; i <= n; i++)
2194 setari_u(av[1], i, nextword(&s));
2195 free(s1);
2196 setvar_i(res, n);
2197 break;
2198 }
2199
2200 case B_ss: {
2201 char *s;
2202
2203 l = strlen(as[0]);
2204 i = getvar_i(av[1]) - 1;
2205 if (i > l)
2206 i = l;
2207 if (i < 0)
2208 i = 0;
2209 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2210 if (n < 0)
2211 n = 0;
2212 s = xstrndup(as[0]+i, n);
2213 setvar_p(res, s);
2214 break;
2215 }
2216
2217
2218
2219 case B_an:
2220 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2221 break;
2222
2223 case B_co:
2224 setvar_i(res, ~getvar_i_int(av[0]));
2225 break;
2226
2227 case B_ls:
2228 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2229 break;
2230
2231 case B_or:
2232 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2233 break;
2234
2235 case B_rs:
2236 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2237 break;
2238
2239 case B_xo:
2240 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2241 break;
2242
2243 case B_lo:
2244 case B_up: {
2245 char *s, *s1;
2246 s1 = s = xstrdup(as[0]);
2247 while (*s1) {
2248
2249 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2250 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2251 s1++;
2252 }
2253 setvar_p(res, s);
2254 break;
2255 }
2256
2257 case B_ix:
2258 n = 0;
2259 ll = strlen(as[1]);
2260 l = strlen(as[0]) - ll;
2261 if (ll > 0 && l >= 0) {
2262 if (!icase) {
2263 char *s = strstr(as[0], as[1]);
2264 if (s)
2265 n = (s - as[0]) + 1;
2266 } else {
2267
2268
2269
2270 for (i = 0; i <= l; i++) {
2271 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2272 n = i+1;
2273 break;
2274 }
2275 }
2276 }
2277 }
2278 setvar_i(res, n);
2279 break;
2280
2281 case B_ti:
2282 if (nargs > 1)
2283 tt = getvar_i(av[1]);
2284 else
2285 time(&tt);
2286
2287 i = strftime(g_buf, MAXVARFMT,
2288 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2289 localtime(&tt));
2290 g_buf[i] = '\0';
2291 setvar_s(res, g_buf);
2292 break;
2293
2294 case B_mt:
2295 setvar_i(res, do_mktime(as[0]));
2296 break;
2297
2298 case B_ma:
2299 re = as_regex(an[1], &sreg);
2300 n = regexec(re, as[0], 1, pmatch, 0);
2301 if (n == 0) {
2302 pmatch[0].rm_so++;
2303 pmatch[0].rm_eo++;
2304 } else {
2305 pmatch[0].rm_so = 0;
2306 pmatch[0].rm_eo = -1;
2307 }
2308 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2309 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2310 setvar_i(res, pmatch[0].rm_so);
2311 if (re == &sreg)
2312 regfree(re);
2313 break;
2314
2315 case B_ge:
2316 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2317 break;
2318
2319 case B_gs:
2320 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2321 break;
2322
2323 case B_su:
2324 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2325 break;
2326 }
2327
2328 nvfree(tv);
2329 return res;
2330#undef tspl
2331}
2332
2333
2334
2335
2336
2337#define XC(n) ((n) >> 8)
2338
2339static var *evaluate(node *op, var *res)
2340{
2341
2342#define fnargs (G.evaluate__fnargs)
2343
2344#define seed (G.evaluate__seed)
2345#define sreg (G.evaluate__sreg)
2346
2347 var *v1;
2348
2349 if (!op)
2350 return setvar_s(res, NULL);
2351
2352 debug_printf_eval("entered %s()\n", __func__);
2353
2354 v1 = nvalloc(2);
2355
2356 while (op) {
2357 struct {
2358 var *v;
2359 const char *s;
2360 } L = L;
2361 struct {
2362 var *v;
2363 const char *s;
2364 } R = R;
2365 double L_d = L_d;
2366 uint32_t opinfo;
2367 int opn;
2368 node *op1;
2369
2370 opinfo = op->info;
2371 opn = (opinfo & OPNMASK);
2372 g_lineno = op->lineno;
2373 op1 = op->l.n;
2374 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2375
2376
2377 if (opinfo & OF_RES1)
2378 L.v = evaluate(op1, v1);
2379 if (opinfo & OF_RES2)
2380 R.v = evaluate(op->r.n, v1+1);
2381 if (opinfo & OF_STR1) {
2382 L.s = getvar_s(L.v);
2383 debug_printf_eval("L.s:'%s'\n", L.s);
2384 }
2385 if (opinfo & OF_STR2) {
2386 R.s = getvar_s(R.v);
2387 debug_printf_eval("R.s:'%s'\n", R.s);
2388 }
2389 if (opinfo & OF_NUM1) {
2390 L_d = getvar_i(L.v);
2391 debug_printf_eval("L_d:%f\n", L_d);
2392 }
2393
2394 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2395 switch (XC(opinfo & OPCLSMASK)) {
2396
2397
2398
2399
2400 case XC( OC_TEST ):
2401 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2402
2403 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2404 op->info |= OF_CHECKED;
2405 if (ptest(op1->r.n))
2406 op->info &= ~OF_CHECKED;
2407 op = op->a.n;
2408 } else {
2409 op = op->r.n;
2410 }
2411 } else {
2412 op = ptest(op1) ? op->a.n : op->r.n;
2413 }
2414 break;
2415
2416
2417 case XC( OC_EXEC ):
2418 break;
2419
2420
2421 case XC( OC_BR ):
2422 op = istrue(L.v) ? op->a.n : op->r.n;
2423 break;
2424
2425
2426 case XC( OC_WALKINIT ):
2427 hashwalk_init(L.v, iamarray(R.v));
2428 break;
2429
2430
2431 case XC( OC_WALKNEXT ):
2432 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2433 break;
2434
2435 case XC( OC_PRINT ):
2436 case XC( OC_PRINTF ): {
2437 FILE *F = stdout;
2438
2439 if (op->r.n) {
2440 rstream *rsm = newfile(R.s);
2441 if (!rsm->F) {
2442 if (opn == '|') {
2443 rsm->F = popen(R.s, "w");
2444 if (rsm->F == NULL)
2445 bb_perror_msg_and_die("popen");
2446 rsm->is_pipe = 1;
2447 } else {
2448 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2449 }
2450 }
2451 F = rsm->F;
2452 }
2453
2454 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2455 if (!op1) {
2456 fputs(getvar_s(intvar[F0]), F);
2457 } else {
2458 while (op1) {
2459 var *v = evaluate(nextarg(&op1), v1);
2460 if (v->type & VF_NUMBER) {
2461 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2462 getvar_i(v), TRUE);
2463 fputs(g_buf, F);
2464 } else {
2465 fputs(getvar_s(v), F);
2466 }
2467
2468 if (op1)
2469 fputs(getvar_s(intvar[OFS]), F);
2470 }
2471 }
2472 fputs(getvar_s(intvar[ORS]), F);
2473
2474 } else {
2475 char *s = awk_printf(op1);
2476 fputs(s, F);
2477 free(s);
2478 }
2479 fflush(F);
2480 break;
2481 }
2482
2483 case XC( OC_DELETE ): {
2484 uint32_t info = op1->info & OPCLSMASK;
2485 var *v;
2486
2487 if (info == OC_VAR) {
2488 v = op1->l.v;
2489 } else if (info == OC_FNARG) {
2490 v = &fnargs[op1->l.aidx];
2491 } else {
2492 syntax_error(EMSG_NOT_ARRAY);
2493 }
2494
2495 if (op1->r.n) {
2496 const char *s;
2497 clrvar(L.v);
2498 s = getvar_s(evaluate(op1->r.n, v1));
2499 hash_remove(iamarray(v), s);
2500 } else {
2501 clear_array(iamarray(v));
2502 }
2503 break;
2504 }
2505
2506 case XC( OC_NEWSOURCE ):
2507 g_progname = op->l.new_progname;
2508 break;
2509
2510 case XC( OC_RETURN ):
2511 copyvar(res, L.v);
2512 break;
2513
2514 case XC( OC_NEXTFILE ):
2515 nextfile = TRUE;
2516 case XC( OC_NEXT ):
2517 nextrec = TRUE;
2518 case XC( OC_DONE ):
2519 clrvar(res);
2520 break;
2521
2522 case XC( OC_EXIT ):
2523 awk_exit(L_d);
2524
2525
2526
2527 case XC( OC_VAR ):
2528 L.v = op->l.v;
2529 if (L.v == intvar[NF])
2530 split_f0();
2531 goto v_cont;
2532
2533 case XC( OC_FNARG ):
2534 L.v = &fnargs[op->l.aidx];
2535 v_cont:
2536 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2537 break;
2538
2539 case XC( OC_IN ):
2540 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2541 break;
2542
2543 case XC( OC_REGEXP ):
2544 op1 = op;
2545 L.s = getvar_s(intvar[F0]);
2546 goto re_cont;
2547
2548 case XC( OC_MATCH ):
2549 op1 = op->r.n;
2550 re_cont:
2551 {
2552 regex_t *re = as_regex(op1, &sreg);
2553 int i = regexec(re, L.s, 0, NULL, 0);
2554 if (re == &sreg)
2555 regfree(re);
2556 setvar_i(res, (i == 0) ^ (opn == '!'));
2557 }
2558 break;
2559
2560 case XC( OC_MOVE ):
2561 debug_printf_eval("MOVE\n");
2562
2563
2564
2565
2566
2567
2568
2569 res = copyvar(L.v, R.v);
2570
2571 break;
2572
2573 case XC( OC_TERNARY ):
2574 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2575 syntax_error(EMSG_POSSIBLE_ERROR);
2576 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2577 break;
2578
2579 case XC( OC_FUNC ): {
2580 var *vbeg, *v;
2581 const char *sv_progname;
2582
2583 if (!op->r.f->body.first)
2584 syntax_error(EMSG_UNDEF_FUNC);
2585
2586 vbeg = v = nvalloc(op->r.f->nargs + 1);
2587 while (op1) {
2588 var *arg = evaluate(nextarg(&op1), v1);
2589 copyvar(v, arg);
2590 v->type |= VF_CHILD;
2591 v->x.parent = arg;
2592 if (++v - vbeg >= op->r.f->nargs)
2593 break;
2594 }
2595
2596 v = fnargs;
2597 fnargs = vbeg;
2598 sv_progname = g_progname;
2599
2600 res = evaluate(op->r.f->body.first, res);
2601
2602 g_progname = sv_progname;
2603 nvfree(fnargs);
2604 fnargs = v;
2605
2606 break;
2607 }
2608
2609 case XC( OC_GETLINE ):
2610 case XC( OC_PGETLINE ): {
2611 rstream *rsm;
2612 int i;
2613
2614 if (op1) {
2615 rsm = newfile(L.s);
2616 if (!rsm->F) {
2617 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2618 rsm->F = popen(L.s, "r");
2619 rsm->is_pipe = TRUE;
2620 } else {
2621 rsm->F = fopen_for_read(L.s);
2622 }
2623 }
2624 } else {
2625 if (!iF)
2626 iF = next_input_file();
2627 rsm = iF;
2628 }
2629
2630 if (!rsm->F) {
2631 setvar_i(intvar[ERRNO], errno);
2632 setvar_i(res, -1);
2633 break;
2634 }
2635
2636 if (!op->r.n)
2637 R.v = intvar[F0];
2638
2639 i = awk_getline(rsm, R.v);
2640 if (i > 0 && !op1) {
2641 incvar(intvar[FNR]);
2642 incvar(intvar[NR]);
2643 }
2644 setvar_i(res, i);
2645 break;
2646 }
2647
2648
2649 case XC( OC_FBLTIN ): {
2650 double R_d = R_d;
2651
2652 switch (opn) {
2653 case F_in:
2654 R_d = (int)L_d;
2655 break;
2656
2657 case F_rn:
2658 R_d = (double)rand() / (double)RAND_MAX;
2659 break;
2660
2661 case F_co:
2662 if (ENABLE_FEATURE_AWK_LIBM) {
2663 R_d = cos(L_d);
2664 break;
2665 }
2666
2667 case F_ex:
2668 if (ENABLE_FEATURE_AWK_LIBM) {
2669 R_d = exp(L_d);
2670 break;
2671 }
2672
2673 case F_lg:
2674 if (ENABLE_FEATURE_AWK_LIBM) {
2675 R_d = log(L_d);
2676 break;
2677 }
2678
2679 case F_si:
2680 if (ENABLE_FEATURE_AWK_LIBM) {
2681 R_d = sin(L_d);
2682 break;
2683 }
2684
2685 case F_sq:
2686 if (ENABLE_FEATURE_AWK_LIBM) {
2687 R_d = sqrt(L_d);
2688 break;
2689 }
2690
2691 syntax_error(EMSG_NO_MATH);
2692 break;
2693
2694 case F_sr:
2695 R_d = (double)seed;
2696 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2697 srand(seed);
2698 break;
2699
2700 case F_ti:
2701 R_d = time(NULL);
2702 break;
2703
2704 case F_le:
2705 if (!op1)
2706 L.s = getvar_s(intvar[F0]);
2707 R_d = strlen(L.s);
2708 break;
2709
2710 case F_sy:
2711 fflush_all();
2712 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2713 ? (system(L.s) >> 8) : 0;
2714 break;
2715
2716 case F_ff:
2717 if (!op1) {
2718 fflush(stdout);
2719 } else if (L.s && *L.s) {
2720 rstream *rsm = newfile(L.s);
2721 fflush(rsm->F);
2722 } else {
2723 fflush_all();
2724 }
2725 break;
2726
2727 case F_cl: {
2728 rstream *rsm;
2729 int err = 0;
2730 rsm = (rstream *)hash_search(fdhash, L.s);
2731 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2732 if (rsm) {
2733 debug_printf_eval("OC_FBLTIN F_cl "
2734 "rsm->is_pipe:%d, ->F:%p\n",
2735 rsm->is_pipe, rsm->F);
2736
2737
2738
2739
2740 if (rsm->F)
2741 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2742 free(rsm->buffer);
2743 hash_remove(fdhash, L.s);
2744 }
2745 if (err)
2746 setvar_i(intvar[ERRNO], errno);
2747 R_d = (double)err;
2748 break;
2749 }
2750 }
2751 setvar_i(res, R_d);
2752 break;
2753 }
2754
2755 case XC( OC_BUILTIN ):
2756 res = exec_builtin(op, res);
2757 break;
2758
2759 case XC( OC_SPRINTF ):
2760 setvar_p(res, awk_printf(op1));
2761 break;
2762
2763 case XC( OC_UNARY ): {
2764 double Ld, R_d;
2765
2766 Ld = R_d = getvar_i(R.v);
2767 switch (opn) {
2768 case 'P':
2769 Ld = ++R_d;
2770 goto r_op_change;
2771 case 'p':
2772 R_d++;
2773 goto r_op_change;
2774 case 'M':
2775 Ld = --R_d;
2776 goto r_op_change;
2777 case 'm':
2778 R_d--;
2779 r_op_change:
2780 setvar_i(R.v, R_d);
2781 break;
2782 case '!':
2783 Ld = !istrue(R.v);
2784 break;
2785 case '-':
2786 Ld = -R_d;
2787 break;
2788 }
2789 setvar_i(res, Ld);
2790 break;
2791 }
2792
2793 case XC( OC_FIELD ): {
2794 int i = (int)getvar_i(R.v);
2795 if (i == 0) {
2796 res = intvar[F0];
2797 } else {
2798 split_f0();
2799 if (i > nfields)
2800 fsrealloc(i);
2801 res = &Fields[i - 1];
2802 }
2803 break;
2804 }
2805
2806
2807 case XC( OC_CONCAT ):
2808 case XC( OC_COMMA ): {
2809 const char *sep = "";
2810 if ((opinfo & OPCLSMASK) == OC_COMMA)
2811 sep = getvar_s(intvar[SUBSEP]);
2812 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2813 break;
2814 }
2815
2816 case XC( OC_LAND ):
2817 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2818 break;
2819
2820 case XC( OC_LOR ):
2821 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2822 break;
2823
2824 case XC( OC_BINARY ):
2825 case XC( OC_REPLACE ): {
2826 double R_d = getvar_i(R.v);
2827 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2828 switch (opn) {
2829 case '+':
2830 L_d += R_d;
2831 break;
2832 case '-':
2833 L_d -= R_d;
2834 break;
2835 case '*':
2836 L_d *= R_d;
2837 break;
2838 case '/':
2839 if (R_d == 0)
2840 syntax_error(EMSG_DIV_BY_ZERO);
2841 L_d /= R_d;
2842 break;
2843 case '&':
2844 if (ENABLE_FEATURE_AWK_LIBM)
2845 L_d = pow(L_d, R_d);
2846 else
2847 syntax_error(EMSG_NO_MATH);
2848 break;
2849 case '%':
2850 if (R_d == 0)
2851 syntax_error(EMSG_DIV_BY_ZERO);
2852 L_d -= (int)(L_d / R_d) * R_d;
2853 break;
2854 }
2855 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2856 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2857 break;
2858 }
2859
2860 case XC( OC_COMPARE ): {
2861 int i = i;
2862 double Ld;
2863
2864 if (is_numeric(L.v) && is_numeric(R.v)) {
2865 Ld = getvar_i(L.v) - getvar_i(R.v);
2866 } else {
2867 const char *l = getvar_s(L.v);
2868 const char *r = getvar_s(R.v);
2869 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
2870 }
2871 switch (opn & 0xfe) {
2872 case 0:
2873 i = (Ld > 0);
2874 break;
2875 case 2:
2876 i = (Ld >= 0);
2877 break;
2878 case 4:
2879 i = (Ld == 0);
2880 break;
2881 }
2882 setvar_i(res, (i == 0) ^ (opn & 1));
2883 break;
2884 }
2885
2886 default:
2887 syntax_error(EMSG_POSSIBLE_ERROR);
2888 }
2889 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2890 op = op->a.n;
2891 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2892 break;
2893 if (nextrec)
2894 break;
2895 }
2896
2897 nvfree(v1);
2898 debug_printf_eval("returning from %s(): %p\n", __func__, res);
2899 return res;
2900#undef fnargs
2901#undef seed
2902#undef sreg
2903}
2904
2905
2906
2907
2908static int awk_exit(int r)
2909{
2910 var tv;
2911 unsigned i;
2912 hash_item *hi;
2913
2914 zero_out_var(&tv);
2915
2916 if (!exiting) {
2917 exiting = TRUE;
2918 nextrec = FALSE;
2919 evaluate(endseq.first, &tv);
2920 }
2921
2922
2923 for (i = 0; i < fdhash->csize; i++) {
2924 hi = fdhash->items[i];
2925 while (hi) {
2926 if (hi->data.rs.F && hi->data.rs.is_pipe)
2927 pclose(hi->data.rs.F);
2928 hi = hi->next;
2929 }
2930 }
2931
2932 exit(r);
2933}
2934
2935
2936
2937static int is_assignment(const char *expr)
2938{
2939 char *exprc, *val, *s, *s1;
2940
2941 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
2942 return FALSE;
2943 }
2944
2945 exprc = xstrdup(expr);
2946 val = exprc + (val - expr);
2947 *val++ = '\0';
2948
2949 s = s1 = val;
2950 while ((*s1 = nextchar(&s)) != '\0')
2951 s1++;
2952
2953 setvar_u(newvar(exprc), val);
2954 free(exprc);
2955 return TRUE;
2956}
2957
2958
2959static rstream *next_input_file(void)
2960{
2961#define rsm (G.next_input_file__rsm)
2962#define files_happen (G.next_input_file__files_happen)
2963
2964 FILE *F = NULL;
2965 const char *fname, *ind;
2966
2967 if (rsm.F)
2968 fclose(rsm.F);
2969 rsm.F = NULL;
2970 rsm.pos = rsm.adv = 0;
2971
2972 do {
2973 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2974 if (files_happen)
2975 return NULL;
2976 fname = "-";
2977 F = stdin;
2978 } else {
2979 ind = getvar_s(incvar(intvar[ARGIND]));
2980 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2981 if (fname && *fname && !is_assignment(fname))
2982 F = xfopen_stdin(fname);
2983 }
2984 } while (!F);
2985
2986 files_happen = TRUE;
2987 setvar_s(intvar[FILENAME], fname);
2988 rsm.F = F;
2989 return &rsm;
2990#undef rsm
2991#undef files_happen
2992}
2993
2994int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2995int awk_main(int argc, char **argv)
2996{
2997 unsigned opt;
2998 char *opt_F, *opt_W;
2999 llist_t *list_v = NULL;
3000 llist_t *list_f = NULL;
3001 int i, j;
3002 var *v;
3003 var tv;
3004 char **envp;
3005 char *vnames = (char *)vNames;
3006 char *vvalues = (char *)vValues;
3007
3008 INIT_G();
3009
3010
3011
3012 if (ENABLE_LOCALE_SUPPORT)
3013 setlocale(LC_NUMERIC, "C");
3014
3015 zero_out_var(&tv);
3016
3017
3018 g_buf = xmalloc(MAXVARFMT + 1);
3019
3020 vhash = hash_init();
3021 ahash = hash_init();
3022 fdhash = hash_init();
3023 fnhash = hash_init();
3024
3025
3026 for (i = 0; *vnames; i++) {
3027 intvar[i] = v = newvar(nextword(&vnames));
3028 if (*vvalues != '\377')
3029 setvar_s(v, nextword(&vvalues));
3030 else
3031 setvar_i(v, 0);
3032
3033 if (*vnames == '*') {
3034 v->type |= VF_SPECIAL;
3035 vnames++;
3036 }
3037 }
3038
3039 handle_special(intvar[FS]);
3040 handle_special(intvar[RS]);
3041
3042 newfile("/dev/stdin")->F = stdin;
3043 newfile("/dev/stdout")->F = stdout;
3044 newfile("/dev/stderr")->F = stderr;
3045
3046
3047 if (environ) for (envp = environ; *envp; envp++) {
3048
3049 char *s = *envp;
3050 char *s1 = strchr(s, '=');
3051 if (s1) {
3052 *s1 = '\0';
3053
3054
3055 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3056 *s1 = '=';
3057 }
3058 }
3059 opt_complementary = "v::f::";
3060 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
3061 argv += optind;
3062 argc -= optind;
3063 if (opt & 0x1)
3064 setvar_s(intvar[FS], opt_F);
3065 while (list_v) {
3066 if (!is_assignment(llist_pop(&list_v)))
3067 bb_show_usage();
3068 }
3069 if (list_f) {
3070 do {
3071 char *s = NULL;
3072 FILE *from_file;
3073
3074 g_progname = llist_pop(&list_f);
3075 from_file = xfopen_stdin(g_progname);
3076
3077 for (i = j = 1; j > 0; i += j) {
3078 s = xrealloc(s, i + 4096);
3079 j = fread(s + i, 1, 4094, from_file);
3080 }
3081 s[i] = '\0';
3082 fclose(from_file);
3083 parse_program(s + 1);
3084 free(s);
3085 } while (list_f);
3086 argc++;
3087 } else {
3088 if (!argc)
3089 bb_show_usage();
3090 g_progname = "cmd. line";
3091 parse_program(*argv++);
3092 }
3093 if (opt & 0x8)
3094 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
3095
3096
3097 setvar_i(intvar[ARGC], argc);
3098 setari_u(intvar[ARGV], 0, "awk");
3099 i = 0;
3100 while (*argv)
3101 setari_u(intvar[ARGV], ++i, *argv++);
3102
3103 evaluate(beginseq.first, &tv);
3104 if (!mainseq.first && !endseq.first)
3105 awk_exit(EXIT_SUCCESS);
3106
3107
3108 if (!iF)
3109 iF = next_input_file();
3110
3111
3112 while (iF) {
3113 nextfile = FALSE;
3114 setvar_i(intvar[FNR], 0);
3115
3116 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3117 nextrec = FALSE;
3118 incvar(intvar[NR]);
3119 incvar(intvar[FNR]);
3120 evaluate(mainseq.first, &tv);
3121
3122 if (nextfile)
3123 break;
3124 }
3125
3126 if (i < 0)
3127 syntax_error(strerror(errno));
3128
3129 iF = next_input_file();
3130 }
3131
3132 awk_exit(EXIT_SUCCESS);
3133
3134}
3135