1
2
3
4
5
6
7
8
9
10#include "libbb.h"
11#include "xregex.h"
12#include <math.h>
13
14
15
16
17#define MAXVARFMT 240
18#define MINNVBLOCK 64
19
20
21#define VF_NUMBER 0x0001
22#define VF_ARRAY 0x0002
23
24#define VF_CACHED 0x0100
25#define VF_USER 0x0200
26#define VF_SPECIAL 0x0400
27#define VF_WALK 0x0800
28#define VF_FSTR 0x1000
29#define VF_CHILD 0x2000
30#define VF_DIRTY 0x4000
31
32
33#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
34
35
36typedef struct var_s {
37 unsigned type;
38 double number;
39 char *string;
40 union {
41 int aidx;
42 struct xhash_s *array;
43 struct var_s *parent;
44 char **walker;
45 } x;
46} var;
47
48
49typedef struct chain_s {
50 struct node_s *first;
51 struct node_s *last;
52 const char *programname;
53} chain;
54
55
56typedef struct func_s {
57 unsigned nargs;
58 struct chain_s body;
59} func;
60
61
62typedef struct rstream_s {
63 FILE *F;
64 char *buffer;
65 int adv;
66 int size;
67 int pos;
68 smallint is_pipe;
69} rstream;
70
71typedef struct hash_item_s {
72 union {
73 struct var_s v;
74 struct rstream_s rs;
75 struct func_s f;
76 } data;
77 struct hash_item_s *next;
78 char name[1];
79} hash_item;
80
81typedef struct xhash_s {
82 unsigned nel;
83 unsigned csize;
84 unsigned nprime;
85 unsigned glen;
86 struct hash_item_s **items;
87} xhash;
88
89
90typedef struct node_s {
91 uint32_t info;
92 unsigned lineno;
93 union {
94 struct node_s *n;
95 var *v;
96 int i;
97 char *s;
98 regex_t *re;
99 } l;
100 union {
101 struct node_s *n;
102 regex_t *ire;
103 func *f;
104 int argno;
105 } r;
106 union {
107 struct node_s *n;
108 } a;
109} node;
110
111
112typedef struct nvblock_s {
113 int size;
114 var *pos;
115 struct nvblock_s *prev;
116 struct nvblock_s *next;
117 var nv[0];
118} nvblock;
119
120typedef struct tsplitter_s {
121 node n;
122 regex_t re[2];
123} tsplitter;
124
125
126
127#define TC_SEQSTART 1
128#define TC_SEQTERM (1 << 1)
129#define TC_REGEXP (1 << 2)
130#define TC_OUTRDR (1 << 3)
131#define TC_UOPPOST (1 << 4)
132#define TC_UOPPRE1 (1 << 5)
133#define TC_BINOPX (1 << 6)
134#define TC_IN (1 << 7)
135#define TC_COMMA (1 << 8)
136#define TC_PIPE (1 << 9)
137#define TC_UOPPRE2 (1 << 10)
138#define TC_ARRTERM (1 << 11)
139#define TC_GRPSTART (1 << 12)
140#define TC_GRPTERM (1 << 13)
141#define TC_SEMICOL (1 << 14)
142#define TC_NEWLINE (1 << 15)
143#define TC_STATX (1 << 16)
144#define TC_WHILE (1 << 17)
145#define TC_ELSE (1 << 18)
146#define TC_BUILTIN (1 << 19)
147#define TC_GETLINE (1 << 20)
148#define TC_FUNCDECL (1 << 21)
149#define TC_BEGIN (1 << 22)
150#define TC_END (1 << 23)
151#define TC_EOF (1 << 24)
152#define TC_VARIABLE (1 << 25)
153#define TC_ARRAY (1 << 26)
154#define TC_FUNCTION (1 << 27)
155#define TC_STRING (1 << 28)
156#define TC_NUMBER (1 << 29)
157
158#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
159
160
161#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
162#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
163#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
164 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
165
166#define TC_STATEMNT (TC_STATX | TC_WHILE)
167#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
168
169
170#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
171 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
172
173
174#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
175 | TC_BINOP | TC_OPTERM)
176
177
178#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
179
180#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
181
182
183
184#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
185 | TC_STRING | TC_NUMBER | TC_UOPPOST)
186#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
187
188#define OF_RES1 0x010000
189#define OF_RES2 0x020000
190#define OF_STR1 0x040000
191#define OF_STR2 0x080000
192#define OF_NUM1 0x100000
193#define OF_CHECKED 0x200000
194
195
196#define xx 0
197#define xV OF_RES2
198#define xS (OF_RES2 | OF_STR2)
199#define Vx OF_RES1
200#define VV (OF_RES1 | OF_RES2)
201#define Nx (OF_RES1 | OF_NUM1)
202#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
203#define Sx (OF_RES1 | OF_STR1)
204#define SV (OF_RES1 | OF_STR1 | OF_RES2)
205#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
206
207#define OPCLSMASK 0xFF00
208#define OPNMASK 0x007F
209
210
211
212
213
214#define P(x) (x << 24)
215#define PRIMASK 0x7F000000
216#define PRIMASK2 0x7E000000
217
218
219
220#define SHIFT_TIL_THIS 0x0600
221#define RECUR_FROM_THIS 0x1000
222
223enum {
224 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
225 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
226
227 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
228 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
229 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
230
231 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
232 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
233 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
234 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
235 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
236 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
237 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
238 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
239 OC_DONE = 0x2800,
240
241 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
242 ST_WHILE = 0x3300
243};
244
245
246enum {
247 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
248 F_ti, F_le, F_sy, F_ff, F_cl
249};
250
251
252enum {
253 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
254 B_ge, B_gs, B_su,
255 B_an, B_co, B_ls, B_or, B_rs, B_xo,
256};
257
258
259
260#define NTC "\377"
261#define NTCC '\377'
262
263#define OC_B OC_BUILTIN
264
265static const char tokenlist[] ALIGN1 =
266 "\1(" NTC
267 "\1)" NTC
268 "\1/" NTC
269 "\2>>" "\1>" "\1|" NTC
270 "\2++" "\2--" NTC
271 "\2++" "\2--" "\1$" NTC
272 "\2==" "\1=" "\2+=" "\2-="
273 "\2*=" "\2/=" "\2%=" "\2^="
274 "\1+" "\1-" "\3**=" "\2**"
275 "\1/" "\1%" "\1^" "\1*"
276 "\2!=" "\2>=" "\2<=" "\1>"
277 "\1<" "\2!~" "\1~" "\2&&"
278 "\2||" "\1?" "\1:" NTC
279 "\2in" NTC
280 "\1," NTC
281 "\1|" NTC
282 "\1+" "\1-" "\1!" NTC
283 "\1]" NTC
284 "\1{" NTC
285 "\1}" NTC
286 "\1;" NTC
287 "\1\n" NTC
288 "\2if" "\2do" "\3for" "\5break"
289 "\10continue" "\6delete" "\5print"
290 "\6printf" "\4next" "\10nextfile"
291 "\6return" "\4exit" NTC
292 "\5while" NTC
293 "\4else" NTC
294
295 "\3and" "\5compl" "\6lshift" "\2or"
296 "\6rshift" "\3xor"
297 "\5close" "\6system" "\6fflush" "\5atan2"
298 "\3cos" "\3exp" "\3int" "\3log"
299 "\4rand" "\3sin" "\4sqrt" "\5srand"
300 "\6gensub" "\4gsub" "\5index" "\6length"
301 "\5match" "\5split" "\7sprintf" "\3sub"
302 "\6substr" "\7systime" "\10strftime"
303 "\7tolower" "\7toupper" NTC
304 "\7getline" NTC
305 "\4func" "\10function" NTC
306 "\5BEGIN" NTC
307 "\3END" "\0"
308 ;
309
310static const uint32_t tokeninfo[] = {
311 0,
312 0,
313 OC_REGEXP,
314 xS|'a', xS|'w', xS|'|',
315 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
316 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
317 OC_FIELD|xV|P(5),
318 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
319 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
320 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
321 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
322 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
323 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
324 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
325 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
326 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
327 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
328 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
329 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
330 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
331 OC_COLON|xx|P(67)|':',
332 OC_IN|SV|P(49),
333 OC_COMMA|SS|P(80),
334 OC_PGETLINE|SV|P(37),
335 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
336 OC_UNARY|xV|P(19)|'!',
337 0,
338 0,
339 0,
340 0,
341 0,
342 ST_IF, ST_DO, ST_FOR, OC_BREAK,
343 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
344 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
345 OC_RETURN|Vx, OC_EXIT|Nx,
346 ST_WHILE,
347 0,
348
349 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
350 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
351 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
352 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
353 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
354 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
355 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
356 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
357 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
358 OC_GETLINE|SV|P(0),
359 0, 0,
360 0,
361 0
362};
363
364
365
366enum {
367 CONVFMT, OFMT, FS, OFS,
368 ORS, RS, RT, FILENAME,
369 SUBSEP, ARGIND, ARGC, ARGV,
370 ERRNO, FNR,
371 NR, NF, IGNORECASE,
372 ENVIRON, F0, NUM_INTERNAL_VARS
373};
374
375static const char vNames[] ALIGN1 =
376 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
377 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
378 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
379 "ERRNO\0" "FNR\0"
380 "NR\0" "NF\0*" "IGNORECASE\0*"
381 "ENVIRON\0" "$\0*" "\0";
382
383static const char vValues[] ALIGN1 =
384 "%.6g\0" "%.6g\0" " \0" " \0"
385 "\n\0" "\n\0" "\0" "\0"
386 "\034\0"
387 "\377";
388
389
390#define FIRST_PRIME 61
391static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
392
393
394
395
396struct globals {
397 chain beginseq, mainseq, endseq;
398 chain *seq;
399 node *break_ptr, *continue_ptr;
400 rstream *iF;
401 xhash *vhash, *ahash, *fdhash, *fnhash;
402 const char *g_progname;
403 int g_lineno;
404 int nfields;
405 int maxfields;
406 var *Fields;
407 nvblock *g_cb;
408 char *g_pos;
409 char *g_buf;
410 smallint icase;
411 smallint exiting;
412 smallint nextrec;
413 smallint nextfile;
414 smallint is_f0_split;
415};
416struct globals2 {
417 uint32_t t_info;
418 uint32_t t_tclass;
419 char *t_string;
420 int t_lineno;
421 int t_rollback;
422
423 var *intvar[NUM_INTERNAL_VARS];
424
425
426 char *split_f0__fstrings;
427
428 uint32_t next_token__save_tclass;
429 uint32_t next_token__save_info;
430 uint32_t next_token__ltclass;
431 smallint next_token__concat_inserted;
432
433 smallint next_input_file__files_happen;
434 rstream next_input_file__rsm;
435
436 var *evaluate__fnargs;
437 unsigned evaluate__seed;
438 regex_t evaluate__sreg;
439
440 var ptest__v;
441
442 tsplitter exec_builtin__tspl;
443
444
445 double t_double;
446 tsplitter fsplitter, rsplitter;
447};
448#define G1 (ptr_to_globals[-1])
449#define G (*(struct globals2 *)ptr_to_globals)
450
451
452
453
454
455#define beginseq (G1.beginseq )
456#define mainseq (G1.mainseq )
457#define endseq (G1.endseq )
458#define seq (G1.seq )
459#define break_ptr (G1.break_ptr )
460#define continue_ptr (G1.continue_ptr)
461#define iF (G1.iF )
462#define vhash (G1.vhash )
463#define ahash (G1.ahash )
464#define fdhash (G1.fdhash )
465#define fnhash (G1.fnhash )
466#define g_progname (G1.g_progname )
467#define g_lineno (G1.g_lineno )
468#define nfields (G1.nfields )
469#define maxfields (G1.maxfields )
470#define Fields (G1.Fields )
471#define g_cb (G1.g_cb )
472#define g_pos (G1.g_pos )
473#define g_buf (G1.g_buf )
474#define icase (G1.icase )
475#define exiting (G1.exiting )
476#define nextrec (G1.nextrec )
477#define nextfile (G1.nextfile )
478#define is_f0_split (G1.is_f0_split )
479#define t_info (G.t_info )
480#define t_tclass (G.t_tclass )
481#define t_string (G.t_string )
482#define t_double (G.t_double )
483#define t_lineno (G.t_lineno )
484#define t_rollback (G.t_rollback )
485#define intvar (G.intvar )
486#define fsplitter (G.fsplitter )
487#define rsplitter (G.rsplitter )
488#define INIT_G() do { \
489 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G1) + sizeof(G)) + sizeof(G1)); \
490 G.next_token__ltclass = TC_OPTERM; \
491 G.evaluate__seed = 1; \
492} while (0)
493
494
495
496static void handle_special(var *);
497static node *parse_expr(uint32_t);
498static void chain_group(void);
499static var *evaluate(node *, var *);
500static rstream *next_input_file(void);
501static int fmt_num(char *, int, const char *, double, int);
502static int awk_exit(int) NORETURN;
503
504
505
506static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
507static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
508static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
509static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
510static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
511static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
512static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
513static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
514static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
515#if !ENABLE_FEATURE_AWK_LIBM
516static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
517#endif
518
519static void zero_out_var(var * vp)
520{
521 memset(vp, 0, sizeof(*vp));
522}
523
524static void syntax_error(const char *const message) NORETURN;
525static void syntax_error(const char *const message)
526{
527 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
528}
529
530
531
532static unsigned hashidx(const char *name)
533{
534 unsigned idx = 0;
535
536 while (*name) idx = *name++ + (idx << 6) - idx;
537 return idx;
538}
539
540
541static xhash *hash_init(void)
542{
543 xhash *newhash;
544
545 newhash = xzalloc(sizeof(xhash));
546 newhash->csize = FIRST_PRIME;
547 newhash->items = xzalloc(newhash->csize * sizeof(hash_item *));
548
549 return newhash;
550}
551
552
553static void *hash_search(xhash *hash, const char *name)
554{
555 hash_item *hi;
556
557 hi = hash->items [ hashidx(name) % hash->csize ];
558 while (hi) {
559 if (strcmp(hi->name, name) == 0)
560 return &(hi->data);
561 hi = hi->next;
562 }
563 return NULL;
564}
565
566
567static void hash_rebuild(xhash *hash)
568{
569 unsigned newsize, i, idx;
570 hash_item **newitems, *hi, *thi;
571
572 if (hash->nprime == ARRAY_SIZE(PRIMES))
573 return;
574
575 newsize = PRIMES[hash->nprime++];
576 newitems = xzalloc(newsize * sizeof(hash_item *));
577
578 for (i = 0; i < hash->csize; i++) {
579 hi = hash->items[i];
580 while (hi) {
581 thi = hi;
582 hi = thi->next;
583 idx = hashidx(thi->name) % newsize;
584 thi->next = newitems[idx];
585 newitems[idx] = thi;
586 }
587 }
588
589 free(hash->items);
590 hash->csize = newsize;
591 hash->items = newitems;
592}
593
594
595static void *hash_find(xhash *hash, const char *name)
596{
597 hash_item *hi;
598 unsigned idx;
599 int l;
600
601 hi = hash_search(hash, name);
602 if (!hi) {
603 if (++hash->nel / hash->csize > 10)
604 hash_rebuild(hash);
605
606 l = strlen(name) + 1;
607 hi = xzalloc(sizeof(hash_item) + l);
608 memcpy(hi->name, name, l);
609
610 idx = hashidx(name) % hash->csize;
611 hi->next = hash->items[idx];
612 hash->items[idx] = hi;
613 hash->glen += l;
614 }
615 return &(hi->data);
616}
617
618#define findvar(hash, name) ((var*) hash_find((hash), (name)))
619#define newvar(name) ((var*) hash_find(vhash, (name)))
620#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
621#define newfunc(name) ((func*) hash_find(fnhash, (name)))
622
623static void hash_remove(xhash *hash, const char *name)
624{
625 hash_item *hi, **phi;
626
627 phi = &(hash->items[hashidx(name) % hash->csize]);
628 while (*phi) {
629 hi = *phi;
630 if (strcmp(hi->name, name) == 0) {
631 hash->glen -= (strlen(name) + 1);
632 hash->nel--;
633 *phi = hi->next;
634 free(hi);
635 break;
636 }
637 phi = &(hi->next);
638 }
639}
640
641
642
643static void skip_spaces(char **s)
644{
645 char *p = *s;
646
647 while (1) {
648 if (*p == '\\' && p[1] == '\n') {
649 p++;
650 t_lineno++;
651 } else if (*p != ' ' && *p != '\t') {
652 break;
653 }
654 p++;
655 }
656 *s = p;
657}
658
659static char *nextword(char **s)
660{
661 char *p = *s;
662
663 while (*(*s)++) ;
664
665 return p;
666}
667
668static char nextchar(char **s)
669{
670 char c, *pps;
671
672 c = *((*s)++);
673 pps = *s;
674 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
675 if (c == '\\' && *s == pps) c = *((*s)++);
676 return c;
677}
678
679static ALWAYS_INLINE int isalnum_(int c)
680{
681 return (isalnum(c) || c == '_');
682}
683
684static double my_strtod(char **pp)
685{
686#if ENABLE_DESKTOP
687 if ((*pp)[0] == '0'
688 && ((((*pp)[1] | 0x20) == 'x') || isdigit((*pp)[1]))
689 ) {
690 return strtoull(*pp, pp, 0);
691 }
692#endif
693 return strtod(*pp, pp);
694}
695
696
697
698static xhash *iamarray(var *v)
699{
700 var *a = v;
701
702 while (a->type & VF_CHILD)
703 a = a->x.parent;
704
705 if (!(a->type & VF_ARRAY)) {
706 a->type |= VF_ARRAY;
707 a->x.array = hash_init();
708 }
709 return a->x.array;
710}
711
712static void clear_array(xhash *array)
713{
714 unsigned i;
715 hash_item *hi, *thi;
716
717 for (i = 0; i < array->csize; i++) {
718 hi = array->items[i];
719 while (hi) {
720 thi = hi;
721 hi = hi->next;
722 free(thi->data.v.string);
723 free(thi);
724 }
725 array->items[i] = NULL;
726 }
727 array->glen = array->nel = 0;
728}
729
730
731static var *clrvar(var *v)
732{
733 if (!(v->type & VF_FSTR))
734 free(v->string);
735
736 v->type &= VF_DONTTOUCH;
737 v->type |= VF_DIRTY;
738 v->string = NULL;
739 return v;
740}
741
742
743static var *setvar_p(var *v, char *value)
744{
745 clrvar(v);
746 v->string = value;
747 handle_special(v);
748 return v;
749}
750
751
752static var *setvar_s(var *v, const char *value)
753{
754 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
755}
756
757
758static var *setvar_u(var *v, const char *value)
759{
760 setvar_s(v, value);
761 v->type |= VF_USER;
762 return v;
763}
764
765
766static void setari_u(var *a, int idx, const char *s)
767{
768 char sidx[sizeof(int)*3 + 1];
769 var *v;
770
771 sprintf(sidx, "%d", idx);
772 v = findvar(iamarray(a), sidx);
773 setvar_u(v, s);
774}
775
776
777static var *setvar_i(var *v, double value)
778{
779 clrvar(v);
780 v->type |= VF_NUMBER;
781 v->number = value;
782 handle_special(v);
783 return v;
784}
785
786static const char *getvar_s(var *v)
787{
788
789 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
790 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
791 v->string = xstrdup(g_buf);
792 v->type |= VF_CACHED;
793 }
794 return (v->string == NULL) ? "" : v->string;
795}
796
797static double getvar_i(var *v)
798{
799 char *s;
800
801 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
802 v->number = 0;
803 s = v->string;
804 if (s && *s) {
805 v->number = my_strtod(&s);
806 if (v->type & VF_USER) {
807 skip_spaces(&s);
808 if (*s != '\0')
809 v->type &= ~VF_USER;
810 }
811 } else {
812 v->type &= ~VF_USER;
813 }
814 v->type |= VF_CACHED;
815 }
816 return v->number;
817}
818
819
820static unsigned long getvar_i_int(var *v)
821{
822 double d = getvar_i(v);
823
824
825
826 if (d >= 0)
827 return (unsigned long)d;
828
829 return - (long) (unsigned long) (-d);
830}
831
832static var *copyvar(var *dest, const var *src)
833{
834 if (dest != src) {
835 clrvar(dest);
836 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
837 dest->number = src->number;
838 if (src->string)
839 dest->string = xstrdup(src->string);
840 }
841 handle_special(dest);
842 return dest;
843}
844
845static var *incvar(var *v)
846{
847 return setvar_i(v, getvar_i(v) + 1.);
848}
849
850
851static int is_numeric(var *v)
852{
853 getvar_i(v);
854 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
855}
856
857
858static int istrue(var *v)
859{
860 if (is_numeric(v))
861 return (v->number == 0) ? 0 : 1;
862 return (v->string && *(v->string)) ? 1 : 0;
863}
864
865
866static var *nvalloc(int n)
867{
868 nvblock *pb = NULL;
869 var *v, *r;
870 int size;
871
872 while (g_cb) {
873 pb = g_cb;
874 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size) break;
875 g_cb = g_cb->next;
876 }
877
878 if (!g_cb) {
879 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
880 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
881 g_cb->size = size;
882 g_cb->pos = g_cb->nv;
883 g_cb->prev = pb;
884
885 if (pb) pb->next = g_cb;
886 }
887
888 v = r = g_cb->pos;
889 g_cb->pos += n;
890
891 while (v < g_cb->pos) {
892 v->type = 0;
893 v->string = NULL;
894 v++;
895 }
896
897 return r;
898}
899
900static void nvfree(var *v)
901{
902 var *p;
903
904 if (v < g_cb->nv || v >= g_cb->pos)
905 syntax_error(EMSG_INTERNAL_ERROR);
906
907 for (p = v; p < g_cb->pos; p++) {
908 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
909 clear_array(iamarray(p));
910 free(p->x.array->items);
911 free(p->x.array);
912 }
913 if (p->type & VF_WALK)
914 free(p->x.walker);
915
916 clrvar(p);
917 }
918
919 g_cb->pos = v;
920 while (g_cb->prev && g_cb->pos == g_cb->nv) {
921 g_cb = g_cb->prev;
922 }
923}
924
925
926
927
928
929
930static uint32_t next_token(uint32_t expected)
931{
932#define concat_inserted (G.next_token__concat_inserted)
933#define save_tclass (G.next_token__save_tclass)
934#define save_info (G.next_token__save_info)
935
936#define ltclass (G.next_token__ltclass)
937
938 char *p, *pp, *s;
939 const char *tl;
940 uint32_t tc;
941 const uint32_t *ti;
942 int l;
943
944 if (t_rollback) {
945 t_rollback = FALSE;
946
947 } else if (concat_inserted) {
948 concat_inserted = FALSE;
949 t_tclass = save_tclass;
950 t_info = save_info;
951
952 } else {
953 p = g_pos;
954 readnext:
955 skip_spaces(&p);
956 g_lineno = t_lineno;
957 if (*p == '#')
958 while (*p != '\n' && *p != '\0')
959 p++;
960
961 if (*p == '\n')
962 t_lineno++;
963
964 if (*p == '\0') {
965 tc = TC_EOF;
966
967 } else if (*p == '\"') {
968
969 t_string = s = ++p;
970 while (*p != '\"') {
971 if (*p == '\0' || *p == '\n')
972 syntax_error(EMSG_UNEXP_EOS);
973 *(s++) = nextchar(&p);
974 }
975 p++;
976 *s = '\0';
977 tc = TC_STRING;
978
979 } else if ((expected & TC_REGEXP) && *p == '/') {
980
981 t_string = s = ++p;
982 while (*p != '/') {
983 if (*p == '\0' || *p == '\n')
984 syntax_error(EMSG_UNEXP_EOS);
985 *s = *p++;
986 if (*s++ == '\\') {
987 pp = p;
988 *(s-1) = bb_process_escape_sequence((const char **)&p);
989 if (*pp == '\\')
990 *s++ = '\\';
991 if (p == pp)
992 *s++ = *p++;
993 }
994 }
995 p++;
996 *s = '\0';
997 tc = TC_REGEXP;
998
999 } else if (*p == '.' || isdigit(*p)) {
1000
1001 t_double = my_strtod(&p);
1002 if (*p == '.')
1003 syntax_error(EMSG_UNEXP_TOKEN);
1004 tc = TC_NUMBER;
1005
1006 } else {
1007
1008 tl = tokenlist;
1009 tc = 0x00000001;
1010 ti = tokeninfo;
1011 while (*tl) {
1012 l = *(tl++);
1013 if (l == NTCC) {
1014 tc <<= 1;
1015 continue;
1016 }
1017
1018
1019
1020
1021 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1022 && *tl == *p && strncmp(p, tl, l) == 0
1023 && !((tc & TC_WORD) && isalnum_(p[l]))
1024 ) {
1025 t_info = *ti;
1026 p += l;
1027 break;
1028 }
1029 ti++;
1030 tl += l;
1031 }
1032
1033 if (!*tl) {
1034
1035
1036
1037 if (!isalnum_(*p))
1038 syntax_error(EMSG_UNEXP_TOKEN);
1039
1040 t_string = --p;
1041 while (isalnum_(*(++p))) {
1042 *(p-1) = *p;
1043 }
1044 *(p-1) = '\0';
1045 tc = TC_VARIABLE;
1046
1047 if (!(expected & TC_VARIABLE))
1048 skip_spaces(&p);
1049 if (*p == '(') {
1050 tc = TC_FUNCTION;
1051 } else {
1052 if (*p == '[') {
1053 p++;
1054 tc = TC_ARRAY;
1055 }
1056 }
1057 }
1058 }
1059 g_pos = p;
1060
1061
1062 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1063 goto readnext;
1064
1065
1066 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1067 concat_inserted = TRUE;
1068 save_tclass = tc;
1069 save_info = t_info;
1070 tc = TC_BINOP;
1071 t_info = OC_CONCAT | SS | P(35);
1072 }
1073
1074 t_tclass = tc;
1075 }
1076 ltclass = t_tclass;
1077
1078
1079 if (!(ltclass & expected))
1080 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1081 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1082
1083 return ltclass;
1084#undef concat_inserted
1085#undef save_tclass
1086#undef save_info
1087#undef ltclass
1088}
1089
1090static void rollback_token(void)
1091{
1092 t_rollback = TRUE;
1093}
1094
1095static node *new_node(uint32_t info)
1096{
1097 node *n;
1098
1099 n = xzalloc(sizeof(node));
1100 n->info = info;
1101 n->lineno = g_lineno;
1102 return n;
1103}
1104
1105static node *mk_re_node(const char *s, node *n, regex_t *re)
1106{
1107 n->info = OC_REGEXP;
1108 n->l.re = re;
1109 n->r.ire = re + 1;
1110 xregcomp(re, s, REG_EXTENDED);
1111 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1112
1113 return n;
1114}
1115
1116static node *condition(void)
1117{
1118 next_token(TC_SEQSTART);
1119 return parse_expr(TC_SEQTERM);
1120}
1121
1122
1123
1124static node *parse_expr(uint32_t iexp)
1125{
1126 node sn;
1127 node *cn = &sn;
1128 node *vn, *glptr;
1129 uint32_t tc, xtc;
1130 var *v;
1131
1132 sn.info = PRIMASK;
1133 sn.r.n = glptr = NULL;
1134 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1135
1136 while (!((tc = next_token(xtc)) & iexp)) {
1137 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1138
1139 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1140 cn->a.n = glptr;
1141 xtc = TC_OPERAND | TC_UOPPRE;
1142 glptr = NULL;
1143
1144 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1145
1146
1147 vn = cn;
1148 while ( ((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1149 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON)) )
1150 vn = vn->a.n;
1151 if ((t_info & OPCLSMASK) == OC_TERNARY)
1152 t_info += P(6);
1153 cn = vn->a.n->r.n = new_node(t_info);
1154 cn->a.n = vn->a.n;
1155 if (tc & TC_BINOP) {
1156 cn->l.n = vn;
1157 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1158 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1159
1160 next_token(TC_GETLINE);
1161
1162 cn->info &= ~PRIMASK;
1163 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1164 }
1165 } else {
1166 cn->r.n = vn;
1167 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1168 }
1169 vn->a.n = cn;
1170
1171 } else {
1172
1173
1174 vn = cn;
1175 cn = vn->r.n = new_node(t_info);
1176 cn->a.n = vn;
1177 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1178 if (tc & (TC_OPERAND | TC_REGEXP)) {
1179 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1180
1181
1182 switch (tc) {
1183 case TC_VARIABLE:
1184 case TC_ARRAY:
1185 cn->info = OC_VAR;
1186 v = hash_search(ahash, t_string);
1187 if (v != NULL) {
1188 cn->info = OC_FNARG;
1189 cn->l.i = v->x.aidx;
1190 } else {
1191 cn->l.v = newvar(t_string);
1192 }
1193 if (tc & TC_ARRAY) {
1194 cn->info |= xS;
1195 cn->r.n = parse_expr(TC_ARRTERM);
1196 }
1197 break;
1198
1199 case TC_NUMBER:
1200 case TC_STRING:
1201 cn->info = OC_VAR;
1202 v = cn->l.v = xzalloc(sizeof(var));
1203 if (tc & TC_NUMBER)
1204 setvar_i(v, t_double);
1205 else
1206 setvar_s(v, t_string);
1207 break;
1208
1209 case TC_REGEXP:
1210 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1211 break;
1212
1213 case TC_FUNCTION:
1214 cn->info = OC_FUNC;
1215 cn->r.f = newfunc(t_string);
1216 cn->l.n = condition();
1217 break;
1218
1219 case TC_SEQSTART:
1220 cn = vn->r.n = parse_expr(TC_SEQTERM);
1221 cn->a.n = vn;
1222 break;
1223
1224 case TC_GETLINE:
1225 glptr = cn;
1226 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1227 break;
1228
1229 case TC_BUILTIN:
1230 cn->l.n = condition();
1231 break;
1232 }
1233 }
1234 }
1235 }
1236 return sn.r.n;
1237}
1238
1239
1240static node *chain_node(uint32_t info)
1241{
1242 node *n;
1243
1244 if (!seq->first)
1245 seq->first = seq->last = new_node(0);
1246
1247 if (seq->programname != g_progname) {
1248 seq->programname = g_progname;
1249 n = chain_node(OC_NEWSOURCE);
1250 n->l.s = xstrdup(g_progname);
1251 }
1252
1253 n = seq->last;
1254 n->info = info;
1255 seq->last = n->a.n = new_node(OC_DONE);
1256
1257 return n;
1258}
1259
1260static void chain_expr(uint32_t info)
1261{
1262 node *n;
1263
1264 n = chain_node(info);
1265 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1266 if (t_tclass & TC_GRPTERM)
1267 rollback_token();
1268}
1269
1270static node *chain_loop(node *nn)
1271{
1272 node *n, *n2, *save_brk, *save_cont;
1273
1274 save_brk = break_ptr;
1275 save_cont = continue_ptr;
1276
1277 n = chain_node(OC_BR | Vx);
1278 continue_ptr = new_node(OC_EXEC);
1279 break_ptr = new_node(OC_EXEC);
1280 chain_group();
1281 n2 = chain_node(OC_EXEC | Vx);
1282 n2->l.n = nn;
1283 n2->a.n = n;
1284 continue_ptr->a.n = n2;
1285 break_ptr->a.n = n->r.n = seq->last;
1286
1287 continue_ptr = save_cont;
1288 break_ptr = save_brk;
1289
1290 return n;
1291}
1292
1293
1294static void chain_group(void)
1295{
1296 uint32_t c;
1297 node *n, *n2, *n3;
1298
1299 do {
1300 c = next_token(TC_GRPSEQ);
1301 } while (c & TC_NEWLINE);
1302
1303 if (c & TC_GRPSTART) {
1304 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1305 if (t_tclass & TC_NEWLINE) continue;
1306 rollback_token();
1307 chain_group();
1308 }
1309 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1310 rollback_token();
1311 chain_expr(OC_EXEC | Vx);
1312 } else {
1313 switch (t_info & OPCLSMASK) {
1314 case ST_IF:
1315 n = chain_node(OC_BR | Vx);
1316 n->l.n = condition();
1317 chain_group();
1318 n2 = chain_node(OC_EXEC);
1319 n->r.n = seq->last;
1320 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1321 chain_group();
1322 n2->a.n = seq->last;
1323 } else {
1324 rollback_token();
1325 }
1326 break;
1327
1328 case ST_WHILE:
1329 n2 = condition();
1330 n = chain_loop(NULL);
1331 n->l.n = n2;
1332 break;
1333
1334 case ST_DO:
1335 n2 = chain_node(OC_EXEC);
1336 n = chain_loop(NULL);
1337 n2->a.n = n->a.n;
1338 next_token(TC_WHILE);
1339 n->l.n = condition();
1340 break;
1341
1342 case ST_FOR:
1343 next_token(TC_SEQSTART);
1344 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1345 if (t_tclass & TC_SEQTERM) {
1346 if ((n2->info & OPCLSMASK) != OC_IN)
1347 syntax_error(EMSG_UNEXP_TOKEN);
1348 n = chain_node(OC_WALKINIT | VV);
1349 n->l.n = n2->l.n;
1350 n->r.n = n2->r.n;
1351 n = chain_loop(NULL);
1352 n->info = OC_WALKNEXT | Vx;
1353 n->l.n = n2->l.n;
1354 } else {
1355 n = chain_node(OC_EXEC | Vx);
1356 n->l.n = n2;
1357 n2 = parse_expr(TC_SEMICOL);
1358 n3 = parse_expr(TC_SEQTERM);
1359 n = chain_loop(n3);
1360 n->l.n = n2;
1361 if (!n2)
1362 n->info = OC_EXEC;
1363 }
1364 break;
1365
1366 case OC_PRINT:
1367 case OC_PRINTF:
1368 n = chain_node(t_info);
1369 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1370 if (t_tclass & TC_OUTRDR) {
1371 n->info |= t_info;
1372 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1373 }
1374 if (t_tclass & TC_GRPTERM)
1375 rollback_token();
1376 break;
1377
1378 case OC_BREAK:
1379 n = chain_node(OC_EXEC);
1380 n->a.n = break_ptr;
1381 break;
1382
1383 case OC_CONTINUE:
1384 n = chain_node(OC_EXEC);
1385 n->a.n = continue_ptr;
1386 break;
1387
1388
1389 default:
1390 chain_expr(t_info);
1391 }
1392 }
1393}
1394
1395static void parse_program(char *p)
1396{
1397 uint32_t tclass;
1398 node *cn;
1399 func *f;
1400 var *v;
1401
1402 g_pos = p;
1403 t_lineno = 1;
1404 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1405 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1406
1407 if (tclass & TC_OPTERM)
1408 continue;
1409
1410 seq = &mainseq;
1411 if (tclass & TC_BEGIN) {
1412 seq = &beginseq;
1413 chain_group();
1414
1415 } else if (tclass & TC_END) {
1416 seq = &endseq;
1417 chain_group();
1418
1419 } else if (tclass & TC_FUNCDECL) {
1420 next_token(TC_FUNCTION);
1421 g_pos++;
1422 f = newfunc(t_string);
1423 f->body.first = NULL;
1424 f->nargs = 0;
1425 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1426 v = findvar(ahash, t_string);
1427 v->x.aidx = (f->nargs)++;
1428
1429 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1430 break;
1431 }
1432 seq = &(f->body);
1433 chain_group();
1434 clear_array(ahash);
1435
1436 } else if (tclass & TC_OPSEQ) {
1437 rollback_token();
1438 cn = chain_node(OC_TEST);
1439 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1440 if (t_tclass & TC_GRPSTART) {
1441 rollback_token();
1442 chain_group();
1443 } else {
1444 chain_node(OC_PRINT);
1445 }
1446 cn->r.n = mainseq.last;
1447
1448 } else {
1449 rollback_token();
1450 chain_group();
1451 }
1452 }
1453}
1454
1455
1456
1457
1458static node *mk_splitter(const char *s, tsplitter *spl)
1459{
1460 regex_t *re, *ire;
1461 node *n;
1462
1463 re = &spl->re[0];
1464 ire = &spl->re[1];
1465 n = &spl->n;
1466 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1467 regfree(re);
1468 regfree(ire);
1469 }
1470 if (strlen(s) > 1) {
1471 mk_re_node(s, n, re);
1472 } else {
1473 n->info = (uint32_t) *s;
1474 }
1475
1476 return n;
1477}
1478
1479
1480
1481
1482
1483static regex_t *as_regex(node *op, regex_t *preg)
1484{
1485 var *v;
1486 const char *s;
1487
1488 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1489 return icase ? op->r.ire : op->l.re;
1490 }
1491 v = nvalloc(1);
1492 s = getvar_s(evaluate(op, v));
1493 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1494 nvfree(v);
1495 return preg;
1496}
1497
1498
1499static void qrealloc(char **b, int n, int *size)
1500{
1501 if (!*b || n >= *size) {
1502 *size = n + (n>>1) + 80;
1503 *b = xrealloc(*b, *size);
1504 }
1505}
1506
1507
1508static void fsrealloc(int size)
1509{
1510 int i;
1511
1512 if (size >= maxfields) {
1513 i = maxfields;
1514 maxfields = size + 16;
1515 Fields = xrealloc(Fields, maxfields * sizeof(var));
1516 for (; i < maxfields; i++) {
1517 Fields[i].type = VF_SPECIAL;
1518 Fields[i].string = NULL;
1519 }
1520 }
1521
1522 if (size < nfields) {
1523 for (i = size; i < nfields; i++) {
1524 clrvar(Fields + i);
1525 }
1526 }
1527 nfields = size;
1528}
1529
1530static int awk_split(const char *s, node *spl, char **slist)
1531{
1532 int l, n = 0;
1533 char c[4];
1534 char *s1;
1535 regmatch_t pmatch[2];
1536
1537
1538 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1539 strcpy(s1, s);
1540
1541 c[0] = c[1] = (char)spl->info;
1542 c[2] = c[3] = '\0';
1543 if (*getvar_s(intvar[RS]) == '\0')
1544 c[2] = '\n';
1545
1546 if ((spl->info & OPCLSMASK) == OC_REGEXP) {
1547 if (!*s)
1548 return n;
1549 n++;
1550 do {
1551 l = strcspn(s, c+2);
1552 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1553 && pmatch[0].rm_so <= l
1554 ) {
1555 l = pmatch[0].rm_so;
1556 if (pmatch[0].rm_eo == 0) {
1557 l++;
1558 pmatch[0].rm_eo++;
1559 }
1560 n++;
1561 } else {
1562 pmatch[0].rm_eo = l;
1563 if (s[l]) pmatch[0].rm_eo++;
1564 }
1565 memcpy(s1, s, l);
1566 s1[l] = '\0';
1567 nextword(&s1);
1568 s += pmatch[0].rm_eo;
1569 } while (*s);
1570 return n;
1571 }
1572 if (c[0] == '\0') {
1573 while (*s) {
1574 *s1++ = *s++;
1575 *s1++ = '\0';
1576 n++;
1577 }
1578 return n;
1579 }
1580 if (c[0] != ' ') {
1581 if (icase) {
1582 c[0] = toupper(c[0]);
1583 c[1] = tolower(c[1]);
1584 }
1585 if (*s1) n++;
1586 while ((s1 = strpbrk(s1, c))) {
1587 *s1++ = '\0';
1588 n++;
1589 }
1590 return n;
1591 }
1592
1593 while (*s) {
1594 s = skip_whitespace(s);
1595 if (!*s) break;
1596 n++;
1597 while (*s && !isspace(*s))
1598 *s1++ = *s++;
1599 *s1++ = '\0';
1600 }
1601 return n;
1602}
1603
1604static void split_f0(void)
1605{
1606
1607#define fstrings (G.split_f0__fstrings)
1608
1609 int i, n;
1610 char *s;
1611
1612 if (is_f0_split)
1613 return;
1614
1615 is_f0_split = TRUE;
1616 free(fstrings);
1617 fsrealloc(0);
1618 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1619 fsrealloc(n);
1620 s = fstrings;
1621 for (i = 0; i < n; i++) {
1622 Fields[i].string = nextword(&s);
1623 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1624 }
1625
1626
1627 clrvar(intvar[NF]);
1628 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1629 intvar[NF]->number = nfields;
1630#undef fstrings
1631}
1632
1633
1634static void handle_special(var *v)
1635{
1636 int n;
1637 char *b;
1638 const char *sep, *s;
1639 int sl, l, len, i, bsize;
1640
1641 if (!(v->type & VF_SPECIAL))
1642 return;
1643
1644 if (v == intvar[NF]) {
1645 n = (int)getvar_i(v);
1646 fsrealloc(n);
1647
1648
1649 sep = getvar_s(intvar[OFS]);
1650 sl = strlen(sep);
1651 b = NULL;
1652 len = 0;
1653 for (i = 0; i < n; i++) {
1654 s = getvar_s(&Fields[i]);
1655 l = strlen(s);
1656 if (b) {
1657 memcpy(b+len, sep, sl);
1658 len += sl;
1659 }
1660 qrealloc(&b, len+l+sl, &bsize);
1661 memcpy(b+len, s, l);
1662 len += l;
1663 }
1664 if (b)
1665 b[len] = '\0';
1666 setvar_p(intvar[F0], b);
1667 is_f0_split = TRUE;
1668
1669 } else if (v == intvar[F0]) {
1670 is_f0_split = FALSE;
1671
1672 } else if (v == intvar[FS]) {
1673 mk_splitter(getvar_s(v), &fsplitter);
1674
1675 } else if (v == intvar[RS]) {
1676 mk_splitter(getvar_s(v), &rsplitter);
1677
1678 } else if (v == intvar[IGNORECASE]) {
1679 icase = istrue(v);
1680
1681 } else {
1682 n = getvar_i(intvar[NF]);
1683 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1684
1685 }
1686}
1687
1688
1689static node *nextarg(node **pn)
1690{
1691 node *n;
1692
1693 n = *pn;
1694 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1695 *pn = n->r.n;
1696 n = n->l.n;
1697 } else {
1698 *pn = NULL;
1699 }
1700 return n;
1701}
1702
1703static void hashwalk_init(var *v, xhash *array)
1704{
1705 char **w;
1706 hash_item *hi;
1707 unsigned i;
1708
1709 if (v->type & VF_WALK)
1710 free(v->x.walker);
1711
1712 v->type |= VF_WALK;
1713 w = v->x.walker = xzalloc(2 + 2*sizeof(char *) + array->glen);
1714 w[0] = w[1] = (char *)(w + 2);
1715 for (i = 0; i < array->csize; i++) {
1716 hi = array->items[i];
1717 while (hi) {
1718 strcpy(*w, hi->name);
1719 nextword(w);
1720 hi = hi->next;
1721 }
1722 }
1723}
1724
1725static int hashwalk_next(var *v)
1726{
1727 char **w;
1728
1729 w = v->x.walker;
1730 if (w[1] == w[0])
1731 return FALSE;
1732
1733 setvar_s(v, nextword(w+1));
1734 return TRUE;
1735}
1736
1737
1738static int ptest(node *pattern)
1739{
1740
1741 return istrue(evaluate(pattern, &G.ptest__v));
1742}
1743
1744
1745static int awk_getline(rstream *rsm, var *v)
1746{
1747 char *b;
1748 regmatch_t pmatch[2];
1749 int a, p, pp=0, size;
1750 int fd, so, eo, r, rp;
1751 char c, *m, *s;
1752
1753
1754
1755
1756 fd = fileno(rsm->F);
1757 m = rsm->buffer;
1758 a = rsm->adv;
1759 p = rsm->pos;
1760 size = rsm->size;
1761 c = (char) rsplitter.n.info;
1762 rp = 0;
1763
1764 if (!m) qrealloc(&m, 256, &size);
1765 do {
1766 b = m + a;
1767 so = eo = p;
1768 r = 1;
1769 if (p > 0) {
1770 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1771 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1772 b, 1, pmatch, 0) == 0) {
1773 so = pmatch[0].rm_so;
1774 eo = pmatch[0].rm_eo;
1775 if (b[eo] != '\0')
1776 break;
1777 }
1778 } else if (c != '\0') {
1779 s = strchr(b+pp, c);
1780 if (!s) s = memchr(b+pp, '\0', p - pp);
1781 if (s) {
1782 so = eo = s-b;
1783 eo++;
1784 break;
1785 }
1786 } else {
1787 while (b[rp] == '\n')
1788 rp++;
1789 s = strstr(b+rp, "\n\n");
1790 if (s) {
1791 so = eo = s-b;
1792 while (b[eo] == '\n') eo++;
1793 if (b[eo] != '\0')
1794 break;
1795 }
1796 }
1797 }
1798
1799 if (a > 0) {
1800 memmove(m, (const void *)(m+a), p+1);
1801 b = m;
1802 a = 0;
1803 }
1804
1805 qrealloc(&m, a+p+128, &size);
1806 b = m + a;
1807 pp = p;
1808 p += safe_read(fd, b+p, size-p-1);
1809 if (p < pp) {
1810 p = 0;
1811 r = 0;
1812 setvar_i(intvar[ERRNO], errno);
1813 }
1814 b[p] = '\0';
1815
1816 } while (p > pp);
1817
1818 if (p == 0) {
1819 r--;
1820 } else {
1821 c = b[so]; b[so] = '\0';
1822 setvar_s(v, b+rp);
1823 v->type |= VF_USER;
1824 b[so] = c;
1825 c = b[eo]; b[eo] = '\0';
1826 setvar_s(intvar[RT], b+so);
1827 b[eo] = c;
1828 }
1829
1830 rsm->buffer = m;
1831 rsm->adv = a + eo;
1832 rsm->pos = p - eo;
1833 rsm->size = size;
1834
1835 return r;
1836}
1837
1838static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
1839{
1840 int r = 0;
1841 char c;
1842 const char *s = format;
1843
1844 if (int_as_int && n == (int)n) {
1845 r = snprintf(b, size, "%d", (int)n);
1846 } else {
1847 do { c = *s; } while (c && *++s);
1848 if (strchr("diouxX", c)) {
1849 r = snprintf(b, size, format, (int)n);
1850 } else if (strchr("eEfgG", c)) {
1851 r = snprintf(b, size, format, n);
1852 } else {
1853 syntax_error(EMSG_INV_FMT);
1854 }
1855 }
1856 return r;
1857}
1858
1859
1860
1861static char *awk_printf(node *n)
1862{
1863 char *b = NULL;
1864 char *fmt, *s, *f;
1865 const char *s1;
1866 int i, j, incr, bsize;
1867 char c, c1;
1868 var *v, *arg;
1869
1870 v = nvalloc(1);
1871 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
1872
1873 i = 0;
1874 while (*f) {
1875 s = f;
1876 while (*f && (*f != '%' || *(++f) == '%'))
1877 f++;
1878 while (*f && !isalpha(*f)) {
1879 if (*f == '*')
1880 syntax_error("%*x formats are not supported");
1881 f++;
1882 }
1883
1884 incr = (f - s) + MAXVARFMT;
1885 qrealloc(&b, incr + i, &bsize);
1886 c = *f;
1887 if (c != '\0') f++;
1888 c1 = *f;
1889 *f = '\0';
1890 arg = evaluate(nextarg(&n), v);
1891
1892 j = i;
1893 if (c == 'c' || !c) {
1894 i += sprintf(b+i, s, is_numeric(arg) ?
1895 (char)getvar_i(arg) : *getvar_s(arg));
1896 } else if (c == 's') {
1897 s1 = getvar_s(arg);
1898 qrealloc(&b, incr+i+strlen(s1), &bsize);
1899 i += sprintf(b+i, s, s1);
1900 } else {
1901 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1902 }
1903 *f = c1;
1904
1905
1906 if (i < j) i = j;
1907 }
1908
1909 b = xrealloc(b, i + 1);
1910 free(fmt);
1911 nvfree(v);
1912 b[i] = '\0';
1913 return b;
1914}
1915
1916
1917
1918
1919
1920
1921
1922static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex)
1923{
1924 char *ds = NULL;
1925 const char *s;
1926 const char *sp;
1927 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1928 regmatch_t pmatch[10];
1929 regex_t sreg, *re;
1930
1931 re = as_regex(rn, &sreg);
1932 if (!src) src = intvar[F0];
1933 if (!dest) dest = intvar[F0];
1934
1935 i = di = 0;
1936 sp = getvar_s(src);
1937 rl = strlen(repl);
1938 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) {
1939 so = pmatch[0].rm_so;
1940 eo = pmatch[0].rm_eo;
1941
1942 qrealloc(&ds, di + eo + rl, &dssize);
1943 memcpy(ds + di, sp, eo);
1944 di += eo;
1945 if (++i >= nm) {
1946
1947 di -= (eo - so);
1948 nbs = 0;
1949 for (s = repl; *s; s++) {
1950 ds[di++] = c = *s;
1951 if (c == '\\') {
1952 nbs++;
1953 continue;
1954 }
1955 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1956 di -= ((nbs + 3) >> 1);
1957 j = 0;
1958 if (c != '&') {
1959 j = c - '0';
1960 nbs++;
1961 }
1962 if (nbs % 2) {
1963 ds[di++] = c;
1964 } else {
1965 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1966 qrealloc(&ds, di + rl + n, &dssize);
1967 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1968 di += n;
1969 }
1970 }
1971 nbs = 0;
1972 }
1973 }
1974
1975 sp += eo;
1976 if (i == nm) break;
1977 if (eo == so) {
1978 ds[di] = *sp++;
1979 if (!ds[di++]) break;
1980 }
1981 }
1982
1983 qrealloc(&ds, di + strlen(sp), &dssize);
1984 strcpy(ds + di, sp);
1985 setvar_p(dest, ds);
1986 if (re == &sreg) regfree(re);
1987 return i;
1988}
1989
1990static var *exec_builtin(node *op, var *res)
1991{
1992#define tspl (G.exec_builtin__tspl)
1993
1994 int (*to_xxx)(int);
1995 var *tv;
1996 node *an[4];
1997 var *av[4];
1998 const char *as[4];
1999 regmatch_t pmatch[2];
2000 regex_t sreg, *re;
2001 node *spl;
2002 uint32_t isr, info;
2003 int nargs;
2004 time_t tt;
2005 char *s, *s1;
2006 int i, l, ll, n;
2007
2008 tv = nvalloc(4);
2009 isr = info = op->info;
2010 op = op->l.n;
2011
2012 av[2] = av[3] = NULL;
2013 for (i = 0; i < 4 && op; i++) {
2014 an[i] = nextarg(&op);
2015 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
2016 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
2017 isr >>= 1;
2018 }
2019
2020 nargs = i;
2021 if ((uint32_t)nargs < (info >> 30))
2022 syntax_error(EMSG_TOO_FEW_ARGS);
2023
2024 switch (info & OPNMASK) {
2025
2026 case B_a2:
2027#if ENABLE_FEATURE_AWK_LIBM
2028 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2029#else
2030 syntax_error(EMSG_NO_MATH);
2031#endif
2032 break;
2033
2034 case B_sp:
2035 if (nargs > 2) {
2036 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2037 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2038 } else {
2039 spl = &fsplitter.n;
2040 }
2041
2042 n = awk_split(as[0], spl, &s);
2043 s1 = s;
2044 clear_array(iamarray(av[1]));
2045 for (i = 1; i <= n; i++)
2046 setari_u(av[1], i, nextword(&s1));
2047 free(s);
2048 setvar_i(res, n);
2049 break;
2050
2051 case B_ss:
2052 l = strlen(as[0]);
2053 i = getvar_i(av[1]) - 1;
2054 if (i > l) i = l;
2055 if (i < 0) i = 0;
2056 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2057 if (n < 0) n = 0;
2058 s = xstrndup(as[0]+i, n);
2059 setvar_p(res, s);
2060 break;
2061
2062
2063
2064 case B_an:
2065 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2066 break;
2067
2068 case B_co:
2069 setvar_i(res, ~getvar_i_int(av[0]));
2070 break;
2071
2072 case B_ls:
2073 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2074 break;
2075
2076 case B_or:
2077 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2078 break;
2079
2080 case B_rs:
2081 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2082 break;
2083
2084 case B_xo:
2085 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2086 break;
2087
2088 case B_lo:
2089 to_xxx = tolower;
2090 goto lo_cont;
2091
2092 case B_up:
2093 to_xxx = toupper;
2094 lo_cont:
2095 s1 = s = xstrdup(as[0]);
2096 while (*s1) {
2097 *s1 = (*to_xxx)(*s1);
2098 s1++;
2099 }
2100 setvar_p(res, s);
2101 break;
2102
2103 case B_ix:
2104 n = 0;
2105 ll = strlen(as[1]);
2106 l = strlen(as[0]) - ll;
2107 if (ll > 0 && l >= 0) {
2108 if (!icase) {
2109 s = strstr(as[0], as[1]);
2110 if (s) n = (s - as[0]) + 1;
2111 } else {
2112
2113
2114
2115 for (i=0; i<=l; i++) {
2116 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2117 n = i+1;
2118 break;
2119 }
2120 }
2121 }
2122 }
2123 setvar_i(res, n);
2124 break;
2125
2126 case B_ti:
2127 if (nargs > 1)
2128 tt = getvar_i(av[1]);
2129 else
2130 time(&tt);
2131
2132 i = strftime(g_buf, MAXVARFMT,
2133 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2134 localtime(&tt));
2135 g_buf[i] = '\0';
2136 setvar_s(res, g_buf);
2137 break;
2138
2139 case B_ma:
2140 re = as_regex(an[1], &sreg);
2141 n = regexec(re, as[0], 1, pmatch, 0);
2142 if (n == 0) {
2143 pmatch[0].rm_so++;
2144 pmatch[0].rm_eo++;
2145 } else {
2146 pmatch[0].rm_so = 0;
2147 pmatch[0].rm_eo = -1;
2148 }
2149 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2150 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2151 setvar_i(res, pmatch[0].rm_so);
2152 if (re == &sreg) regfree(re);
2153 break;
2154
2155 case B_ge:
2156 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2157 break;
2158
2159 case B_gs:
2160 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2161 break;
2162
2163 case B_su:
2164 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2165 break;
2166 }
2167
2168 nvfree(tv);
2169 return res;
2170#undef tspl
2171}
2172
2173
2174
2175
2176
2177#define XC(n) ((n) >> 8)
2178
2179static var *evaluate(node *op, var *res)
2180{
2181
2182#define fnargs (G.evaluate__fnargs)
2183
2184#define seed (G.evaluate__seed)
2185#define sreg (G.evaluate__sreg)
2186
2187 node *op1;
2188 var *v1;
2189 union {
2190 var *v;
2191 const char *s;
2192 double d;
2193 int i;
2194 } L, R;
2195 uint32_t opinfo;
2196 int opn;
2197 union {
2198 char *s;
2199 rstream *rsm;
2200 FILE *F;
2201 var *v;
2202 regex_t *re;
2203 uint32_t info;
2204 } X;
2205
2206 if (!op)
2207 return setvar_s(res, NULL);
2208
2209 v1 = nvalloc(2);
2210
2211 while (op) {
2212 opinfo = op->info;
2213 opn = (opinfo & OPNMASK);
2214 g_lineno = op->lineno;
2215
2216
2217 op1 = op->l.n;
2218 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2219 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2220 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2221 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2222 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2223
2224 switch (XC(opinfo & OPCLSMASK)) {
2225
2226
2227
2228
2229 case XC( OC_TEST ):
2230 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2231
2232 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2233 op->info |= OF_CHECKED;
2234 if (ptest(op1->r.n))
2235 op->info &= ~OF_CHECKED;
2236
2237 op = op->a.n;
2238 } else {
2239 op = op->r.n;
2240 }
2241 } else {
2242 op = (ptest(op1)) ? op->a.n : op->r.n;
2243 }
2244 break;
2245
2246
2247 case XC( OC_EXEC ):
2248 break;
2249
2250
2251 case XC( OC_BR ):
2252 op = istrue(L.v) ? op->a.n : op->r.n;
2253 break;
2254
2255
2256 case XC( OC_WALKINIT ):
2257 hashwalk_init(L.v, iamarray(R.v));
2258 break;
2259
2260
2261 case XC( OC_WALKNEXT ):
2262 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2263 break;
2264
2265 case XC( OC_PRINT ):
2266 case XC( OC_PRINTF ):
2267 X.F = stdout;
2268 if (op->r.n) {
2269 X.rsm = newfile(R.s);
2270 if (!X.rsm->F) {
2271 if (opn == '|') {
2272 X.rsm->F = popen(R.s, "w");
2273 if (X.rsm->F == NULL)
2274 bb_perror_msg_and_die("popen");
2275 X.rsm->is_pipe = 1;
2276 } else {
2277 X.rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2278 }
2279 }
2280 X.F = X.rsm->F;
2281 }
2282
2283 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2284 if (!op1) {
2285 fputs(getvar_s(intvar[F0]), X.F);
2286 } else {
2287 while (op1) {
2288 L.v = evaluate(nextarg(&op1), v1);
2289 if (L.v->type & VF_NUMBER) {
2290 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2291 getvar_i(L.v), TRUE);
2292 fputs(g_buf, X.F);
2293 } else {
2294 fputs(getvar_s(L.v), X.F);
2295 }
2296
2297 if (op1) fputs(getvar_s(intvar[OFS]), X.F);
2298 }
2299 }
2300 fputs(getvar_s(intvar[ORS]), X.F);
2301
2302 } else {
2303 L.s = awk_printf(op1);
2304 fputs(L.s, X.F);
2305 free((char*)L.s);
2306 }
2307 fflush(X.F);
2308 break;
2309
2310 case XC( OC_DELETE ):
2311 X.info = op1->info & OPCLSMASK;
2312 if (X.info == OC_VAR) {
2313 R.v = op1->l.v;
2314 } else if (X.info == OC_FNARG) {
2315 R.v = &fnargs[op1->l.i];
2316 } else {
2317 syntax_error(EMSG_NOT_ARRAY);
2318 }
2319
2320 if (op1->r.n) {
2321 clrvar(L.v);
2322 L.s = getvar_s(evaluate(op1->r.n, v1));
2323 hash_remove(iamarray(R.v), L.s);
2324 } else {
2325 clear_array(iamarray(R.v));
2326 }
2327 break;
2328
2329 case XC( OC_NEWSOURCE ):
2330 g_progname = op->l.s;
2331 break;
2332
2333 case XC( OC_RETURN ):
2334 copyvar(res, L.v);
2335 break;
2336
2337 case XC( OC_NEXTFILE ):
2338 nextfile = TRUE;
2339 case XC( OC_NEXT ):
2340 nextrec = TRUE;
2341 case XC( OC_DONE ):
2342 clrvar(res);
2343 break;
2344
2345 case XC( OC_EXIT ):
2346 awk_exit(L.d);
2347
2348
2349
2350 case XC( OC_VAR ):
2351 L.v = op->l.v;
2352 if (L.v == intvar[NF])
2353 split_f0();
2354 goto v_cont;
2355
2356 case XC( OC_FNARG ):
2357 L.v = &fnargs[op->l.i];
2358 v_cont:
2359 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2360 break;
2361
2362 case XC( OC_IN ):
2363 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2364 break;
2365
2366 case XC( OC_REGEXP ):
2367 op1 = op;
2368 L.s = getvar_s(intvar[F0]);
2369 goto re_cont;
2370
2371 case XC( OC_MATCH ):
2372 op1 = op->r.n;
2373 re_cont:
2374 X.re = as_regex(op1, &sreg);
2375 R.i = regexec(X.re, L.s, 0, NULL, 0);
2376 if (X.re == &sreg) regfree(X.re);
2377 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2378 break;
2379
2380 case XC( OC_MOVE ):
2381
2382 if (R.v == v1+1 && R.v->string) {
2383 res = setvar_p(L.v, R.v->string);
2384 R.v->string = NULL;
2385 } else {
2386 res = copyvar(L.v, R.v);
2387 }
2388 break;
2389
2390 case XC( OC_TERNARY ):
2391 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2392 syntax_error(EMSG_POSSIBLE_ERROR);
2393 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2394 break;
2395
2396 case XC( OC_FUNC ):
2397 if (!op->r.f->body.first)
2398 syntax_error(EMSG_UNDEF_FUNC);
2399
2400 X.v = R.v = nvalloc(op->r.f->nargs+1);
2401 while (op1) {
2402 L.v = evaluate(nextarg(&op1), v1);
2403 copyvar(R.v, L.v);
2404 R.v->type |= VF_CHILD;
2405 R.v->x.parent = L.v;
2406 if (++R.v - X.v >= op->r.f->nargs)
2407 break;
2408 }
2409
2410 R.v = fnargs;
2411 fnargs = X.v;
2412
2413 L.s = g_progname;
2414 res = evaluate(op->r.f->body.first, res);
2415 g_progname = L.s;
2416
2417 nvfree(fnargs);
2418 fnargs = R.v;
2419 break;
2420
2421 case XC( OC_GETLINE ):
2422 case XC( OC_PGETLINE ):
2423 if (op1) {
2424 X.rsm = newfile(L.s);
2425 if (!X.rsm->F) {
2426 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2427 X.rsm->F = popen(L.s, "r");
2428 X.rsm->is_pipe = TRUE;
2429 } else {
2430 X.rsm->F = fopen_for_read(L.s);
2431 }
2432 }
2433 } else {
2434 if (!iF) iF = next_input_file();
2435 X.rsm = iF;
2436 }
2437
2438 if (!X.rsm->F) {
2439 setvar_i(intvar[ERRNO], errno);
2440 setvar_i(res, -1);
2441 break;
2442 }
2443
2444 if (!op->r.n)
2445 R.v = intvar[F0];
2446
2447 L.i = awk_getline(X.rsm, R.v);
2448 if (L.i > 0) {
2449 if (!op1) {
2450 incvar(intvar[FNR]);
2451 incvar(intvar[NR]);
2452 }
2453 }
2454 setvar_i(res, L.i);
2455 break;
2456
2457
2458 case XC( OC_FBLTIN ):
2459 switch (opn) {
2460
2461 case F_in:
2462 R.d = (int)L.d;
2463 break;
2464
2465 case F_rn:
2466 R.d = (double)rand() / (double)RAND_MAX;
2467 break;
2468#if ENABLE_FEATURE_AWK_LIBM
2469 case F_co:
2470 R.d = cos(L.d);
2471 break;
2472
2473 case F_ex:
2474 R.d = exp(L.d);
2475 break;
2476
2477 case F_lg:
2478 R.d = log(L.d);
2479 break;
2480
2481 case F_si:
2482 R.d = sin(L.d);
2483 break;
2484
2485 case F_sq:
2486 R.d = sqrt(L.d);
2487 break;
2488#else
2489 case F_co:
2490 case F_ex:
2491 case F_lg:
2492 case F_si:
2493 case F_sq:
2494 syntax_error(EMSG_NO_MATH);
2495 break;
2496#endif
2497 case F_sr:
2498 R.d = (double)seed;
2499 seed = op1 ? (unsigned)L.d : (unsigned)time(NULL);
2500 srand(seed);
2501 break;
2502
2503 case F_ti:
2504 R.d = time(NULL);
2505 break;
2506
2507 case F_le:
2508 if (!op1)
2509 L.s = getvar_s(intvar[F0]);
2510 R.d = strlen(L.s);
2511 break;
2512
2513 case F_sy:
2514 fflush(NULL);
2515 R.d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2516 ? (system(L.s) >> 8) : 0;
2517 break;
2518
2519 case F_ff:
2520 if (!op1)
2521 fflush(stdout);
2522 else {
2523 if (L.s && *L.s) {
2524 X.rsm = newfile(L.s);
2525 fflush(X.rsm->F);
2526 } else {
2527 fflush(NULL);
2528 }
2529 }
2530 break;
2531
2532 case F_cl:
2533 X.rsm = (rstream *)hash_search(fdhash, L.s);
2534 if (X.rsm) {
2535 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
2536 free(X.rsm->buffer);
2537 hash_remove(fdhash, L.s);
2538 }
2539 if (R.i != 0)
2540 setvar_i(intvar[ERRNO], errno);
2541 R.d = (double)R.i;
2542 break;
2543 }
2544 setvar_i(res, R.d);
2545 break;
2546
2547 case XC( OC_BUILTIN ):
2548 res = exec_builtin(op, res);
2549 break;
2550
2551 case XC( OC_SPRINTF ):
2552 setvar_p(res, awk_printf(op1));
2553 break;
2554
2555 case XC( OC_UNARY ):
2556 X.v = R.v;
2557 L.d = R.d = getvar_i(R.v);
2558 switch (opn) {
2559 case 'P':
2560 L.d = ++R.d;
2561 goto r_op_change;
2562 case 'p':
2563 R.d++;
2564 goto r_op_change;
2565 case 'M':
2566 L.d = --R.d;
2567 goto r_op_change;
2568 case 'm':
2569 R.d--;
2570 goto r_op_change;
2571 case '!':
2572 L.d = istrue(X.v) ? 0 : 1;
2573 break;
2574 case '-':
2575 L.d = -R.d;
2576 break;
2577 r_op_change:
2578 setvar_i(X.v, R.d);
2579 }
2580 setvar_i(res, L.d);
2581 break;
2582
2583 case XC( OC_FIELD ):
2584 R.i = (int)getvar_i(R.v);
2585 if (R.i == 0) {
2586 res = intvar[F0];
2587 } else {
2588 split_f0();
2589 if (R.i > nfields)
2590 fsrealloc(R.i);
2591 res = &Fields[R.i - 1];
2592 }
2593 break;
2594
2595
2596 case XC( OC_CONCAT ):
2597 case XC( OC_COMMA ):
2598 opn = strlen(L.s) + strlen(R.s) + 2;
2599 X.s = xmalloc(opn);
2600 strcpy(X.s, L.s);
2601 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2602 L.s = getvar_s(intvar[SUBSEP]);
2603 X.s = xrealloc(X.s, opn + strlen(L.s));
2604 strcat(X.s, L.s);
2605 }
2606 strcat(X.s, R.s);
2607 setvar_p(res, X.s);
2608 break;
2609
2610 case XC( OC_LAND ):
2611 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2612 break;
2613
2614 case XC( OC_LOR ):
2615 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2616 break;
2617
2618 case XC( OC_BINARY ):
2619 case XC( OC_REPLACE ):
2620 R.d = getvar_i(R.v);
2621 switch (opn) {
2622 case '+':
2623 L.d += R.d;
2624 break;
2625 case '-':
2626 L.d -= R.d;
2627 break;
2628 case '*':
2629 L.d *= R.d;
2630 break;
2631 case '/':
2632 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2633 L.d /= R.d;
2634 break;
2635 case '&':
2636#if ENABLE_FEATURE_AWK_LIBM
2637 L.d = pow(L.d, R.d);
2638#else
2639 syntax_error(EMSG_NO_MATH);
2640#endif
2641 break;
2642 case '%':
2643 if (R.d == 0) syntax_error(EMSG_DIV_BY_ZERO);
2644 L.d -= (int)(L.d / R.d) * R.d;
2645 break;
2646 }
2647 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2648 break;
2649
2650 case XC( OC_COMPARE ):
2651 if (is_numeric(L.v) && is_numeric(R.v)) {
2652 L.d = getvar_i(L.v) - getvar_i(R.v);
2653 } else {
2654 L.s = getvar_s(L.v);
2655 R.s = getvar_s(R.v);
2656 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2657 }
2658 switch (opn & 0xfe) {
2659 case 0:
2660 R.i = (L.d > 0);
2661 break;
2662 case 2:
2663 R.i = (L.d >= 0);
2664 break;
2665 case 4:
2666 R.i = (L.d == 0);
2667 break;
2668 }
2669 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2670 break;
2671
2672 default:
2673 syntax_error(EMSG_POSSIBLE_ERROR);
2674 }
2675 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2676 op = op->a.n;
2677 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2678 break;
2679 if (nextrec)
2680 break;
2681 }
2682 nvfree(v1);
2683 return res;
2684#undef fnargs
2685#undef seed
2686#undef sreg
2687}
2688
2689
2690
2691
2692static int awk_exit(int r)
2693{
2694 var tv;
2695 unsigned i;
2696 hash_item *hi;
2697
2698 zero_out_var(&tv);
2699
2700 if (!exiting) {
2701 exiting = TRUE;
2702 nextrec = FALSE;
2703 evaluate(endseq.first, &tv);
2704 }
2705
2706
2707 for (i = 0; i < fdhash->csize; i++) {
2708 hi = fdhash->items[i];
2709 while (hi) {
2710 if (hi->data.rs.F && hi->data.rs.is_pipe)
2711 pclose(hi->data.rs.F);
2712 hi = hi->next;
2713 }
2714 }
2715
2716 exit(r);
2717}
2718
2719
2720
2721static int is_assignment(const char *expr)
2722{
2723 char *exprc, *s, *s0, *s1;
2724
2725 exprc = xstrdup(expr);
2726 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2727 free(exprc);
2728 return FALSE;
2729 }
2730
2731 *(s++) = '\0';
2732 s0 = s1 = s;
2733 while (*s)
2734 *(s1++) = nextchar(&s);
2735
2736 *s1 = '\0';
2737 setvar_u(newvar(exprc), s0);
2738 free(exprc);
2739 return TRUE;
2740}
2741
2742
2743static rstream *next_input_file(void)
2744{
2745#define rsm (G.next_input_file__rsm)
2746#define files_happen (G.next_input_file__files_happen)
2747
2748 FILE *F = NULL;
2749 const char *fname, *ind;
2750
2751 if (rsm.F) fclose(rsm.F);
2752 rsm.F = NULL;
2753 rsm.pos = rsm.adv = 0;
2754
2755 do {
2756 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2757 if (files_happen)
2758 return NULL;
2759 fname = "-";
2760 F = stdin;
2761 } else {
2762 ind = getvar_s(incvar(intvar[ARGIND]));
2763 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2764 if (fname && *fname && !is_assignment(fname))
2765 F = xfopen_stdin(fname);
2766 }
2767 } while (!F);
2768
2769 files_happen = TRUE;
2770 setvar_s(intvar[FILENAME], fname);
2771 rsm.F = F;
2772 return &rsm;
2773#undef rsm
2774#undef files_happen
2775}
2776
2777int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
2778int awk_main(int argc, char **argv)
2779{
2780 unsigned opt;
2781 char *opt_F, *opt_W;
2782 llist_t *list_v = NULL;
2783 llist_t *list_f = NULL;
2784 int i, j;
2785 var *v;
2786 var tv;
2787 char **envp;
2788 char *vnames = (char *)vNames;
2789 char *vvalues = (char *)vValues;
2790
2791 INIT_G();
2792
2793
2794
2795 if (ENABLE_LOCALE_SUPPORT)
2796 setlocale(LC_NUMERIC, "C");
2797
2798 zero_out_var(&tv);
2799
2800
2801 g_buf = xmalloc(MAXVARFMT + 1);
2802
2803 vhash = hash_init();
2804 ahash = hash_init();
2805 fdhash = hash_init();
2806 fnhash = hash_init();
2807
2808
2809 for (i = 0; *vnames; i++) {
2810 intvar[i] = v = newvar(nextword(&vnames));
2811 if (*vvalues != '\377')
2812 setvar_s(v, nextword(&vvalues));
2813 else
2814 setvar_i(v, 0);
2815
2816 if (*vnames == '*') {
2817 v->type |= VF_SPECIAL;
2818 vnames++;
2819 }
2820 }
2821
2822 handle_special(intvar[FS]);
2823 handle_special(intvar[RS]);
2824
2825 newfile("/dev/stdin")->F = stdin;
2826 newfile("/dev/stdout")->F = stdout;
2827 newfile("/dev/stderr")->F = stderr;
2828
2829
2830 if (environ) for (envp = environ; *envp; envp++) {
2831
2832 char *s = *envp;
2833 char *s1 = strchr(s, '=');
2834 if (s1) {
2835 *s1 = '\0';
2836
2837
2838 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
2839 *s1 = '=';
2840 }
2841 }
2842 opt_complementary = "v::f::";
2843 opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
2844 argv += optind;
2845 argc -= optind;
2846 if (opt & 0x1)
2847 setvar_s(intvar[FS], opt_F);
2848 while (list_v) {
2849 if (!is_assignment(llist_pop(&list_v)))
2850 bb_show_usage();
2851 }
2852 if (list_f) {
2853 do {
2854 char *s = NULL;
2855 FILE *from_file;
2856
2857 g_progname = llist_pop(&list_f);
2858 from_file = xfopen_stdin(g_progname);
2859
2860 for (i = j = 1; j > 0; i += j) {
2861 s = xrealloc(s, i + 4096);
2862 j = fread(s + i, 1, 4094, from_file);
2863 }
2864 s[i] = '\0';
2865 fclose(from_file);
2866 parse_program(s + 1);
2867 free(s);
2868 } while (list_f);
2869 } else {
2870 if (!argc)
2871 bb_show_usage();
2872 g_progname = "cmd. line";
2873 parse_program(*argv++);
2874 argc--;
2875 }
2876 if (opt & 0x8)
2877 bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
2878
2879
2880 setvar_i(intvar[ARGC], argc + 1);
2881 setari_u(intvar[ARGV], 0, "awk");
2882 i = 0;
2883 while (*argv)
2884 setari_u(intvar[ARGV], ++i, *argv++);
2885
2886 evaluate(beginseq.first, &tv);
2887 if (!mainseq.first && !endseq.first)
2888 awk_exit(EXIT_SUCCESS);
2889
2890
2891 if (!iF) iF = next_input_file();
2892
2893
2894 while (iF) {
2895 nextfile = FALSE;
2896 setvar_i(intvar[FNR], 0);
2897
2898 while ((i = awk_getline(iF, intvar[F0])) > 0) {
2899 nextrec = FALSE;
2900 incvar(intvar[NR]);
2901 incvar(intvar[FNR]);
2902 evaluate(mainseq.first, &tv);
2903
2904 if (nextfile)
2905 break;
2906 }
2907
2908 if (i < 0)
2909 syntax_error(strerror(errno));
2910
2911 iF = next_input_file();
2912 }
2913
2914 awk_exit(EXIT_SUCCESS);
2915
2916}
2917