linux/scripts/unifdef.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2002 - 2005 Tony Finch <dot@dotat.at>.  All rights reserved.
   3 *
   4 * This code is derived from software contributed to Berkeley by Dave Yost.
   5 * It was rewritten to support ANSI C by Tony Finch. The original version of
   6 * unifdef carried the following copyright notice. None of its code remains
   7 * in this version (though some of the names remain).
   8 *
   9 * Copyright (c) 1985, 1993
  10 *      The Regents of the University of California.  All rights reserved.
  11 *
  12 * Redistribution and use in source and binary forms, with or without
  13 * modification, are permitted provided that the following conditions
  14 * are met:
  15 * 1. Redistributions of source code must retain the above copyright
  16 *    notice, this list of conditions and the following disclaimer.
  17 * 2. Redistributions in binary form must reproduce the above copyright
  18 *    notice, this list of conditions and the following disclaimer in the
  19 *    documentation and/or other materials provided with the distribution.
  20 *
  21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31 * SUCH DAMAGE.
  32 */
  33
  34#include <sys/cdefs.h>
  35
  36#ifndef lint
  37#if 0
  38static const char copyright[] =
  39"@(#) Copyright (c) 1985, 1993\n\
  40        The Regents of the University of California.  All rights reserved.\n";
  41#endif
  42#ifdef __IDSTRING
  43__IDSTRING(Berkeley, "@(#)unifdef.c     8.1 (Berkeley) 6/6/93");
  44__IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.8 2000/07/03 02:51:36 matt Exp $");
  45__IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.171 2005/03/08 12:38:48 fanf2 Exp $");
  46#endif
  47#endif /* not lint */
  48#ifdef __FBSDID
  49__FBSDID("$FreeBSD: /repoman/r/ncvs/src/usr.bin/unifdef/unifdef.c,v 1.20 2005/05/21 09:55:09 ru Exp $");
  50#endif
  51
  52/*
  53 * unifdef - remove ifdef'ed lines
  54 *
  55 *  Wishlist:
  56 *      provide an option which will append the name of the
  57 *        appropriate symbol after #else's and #endif's
  58 *      provide an option which will check symbols after
  59 *        #else's and #endif's to see that they match their
  60 *        corresponding #ifdef or #ifndef
  61 *
  62 *   The first two items above require better buffer handling, which would
  63 *     also make it possible to handle all "dodgy" directives correctly.
  64 */
  65
  66#include <ctype.h>
  67#include <err.h>
  68#include <stdarg.h>
  69#include <stdbool.h>
  70#include <stdio.h>
  71#include <stdlib.h>
  72#include <string.h>
  73#include <unistd.h>
  74
  75size_t strlcpy(char *dst, const char *src, size_t siz);
  76
  77/* types of input lines: */
  78typedef enum {
  79        LT_TRUEI,               /* a true #if with ignore flag */
  80        LT_FALSEI,              /* a false #if with ignore flag */
  81        LT_IF,                  /* an unknown #if */
  82        LT_TRUE,                /* a true #if */
  83        LT_FALSE,               /* a false #if */
  84        LT_ELIF,                /* an unknown #elif */
  85        LT_ELTRUE,              /* a true #elif */
  86        LT_ELFALSE,             /* a false #elif */
  87        LT_ELSE,                /* #else */
  88        LT_ENDIF,               /* #endif */
  89        LT_DODGY,               /* flag: directive is not on one line */
  90        LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
  91        LT_PLAIN,               /* ordinary line */
  92        LT_EOF,                 /* end of file */
  93        LT_COUNT
  94} Linetype;
  95
  96static char const * const linetype_name[] = {
  97        "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
  98        "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
  99        "DODGY TRUEI", "DODGY FALSEI",
 100        "DODGY IF", "DODGY TRUE", "DODGY FALSE",
 101        "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
 102        "DODGY ELSE", "DODGY ENDIF",
 103        "PLAIN", "EOF"
 104};
 105
 106/* state of #if processing */
 107typedef enum {
 108        IS_OUTSIDE,
 109        IS_FALSE_PREFIX,        /* false #if followed by false #elifs */
 110        IS_TRUE_PREFIX,         /* first non-false #(el)if is true */
 111        IS_PASS_MIDDLE,         /* first non-false #(el)if is unknown */
 112        IS_FALSE_MIDDLE,        /* a false #elif after a pass state */
 113        IS_TRUE_MIDDLE,         /* a true #elif after a pass state */
 114        IS_PASS_ELSE,           /* an else after a pass state */
 115        IS_FALSE_ELSE,          /* an else after a true state */
 116        IS_TRUE_ELSE,           /* an else after only false states */
 117        IS_FALSE_TRAILER,       /* #elifs after a true are false */
 118        IS_COUNT
 119} Ifstate;
 120
 121static char const * const ifstate_name[] = {
 122        "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
 123        "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
 124        "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
 125        "FALSE_TRAILER"
 126};
 127
 128/* state of comment parser */
 129typedef enum {
 130        NO_COMMENT = false,     /* outside a comment */
 131        C_COMMENT,              /* in a comment like this one */
 132        CXX_COMMENT,            /* between // and end of line */
 133        STARTING_COMMENT,       /* just after slash-backslash-newline */
 134        FINISHING_COMMENT,      /* star-backslash-newline in a C comment */
 135        CHAR_LITERAL,           /* inside '' */
 136        STRING_LITERAL          /* inside "" */
 137} Comment_state;
 138
 139static char const * const comment_name[] = {
 140        "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
 141};
 142
 143/* state of preprocessor line parser */
 144typedef enum {
 145        LS_START,               /* only space and comments on this line */
 146        LS_HASH,                /* only space, comments, and a hash */
 147        LS_DIRTY                /* this line can't be a preprocessor line */
 148} Line_state;
 149
 150static char const * const linestate_name[] = {
 151        "START", "HASH", "DIRTY"
 152};
 153
 154/*
 155 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
 156 */
 157#define MAXDEPTH        64                      /* maximum #if nesting */
 158#define MAXLINE         4096                    /* maximum length of line */
 159#define MAXSYMS         4096                    /* maximum number of symbols */
 160
 161/*
 162 * Sometimes when editing a keyword the replacement text is longer, so
 163 * we leave some space at the end of the tline buffer to accommodate this.
 164 */
 165#define EDITSLOP        10
 166
 167/*
 168 * Globals.
 169 */
 170
 171static bool             complement;             /* -c: do the complement */
 172static bool             debugging;              /* -d: debugging reports */
 173static bool             iocccok;                /* -e: fewer IOCCC errors */
 174static bool             killconsts;             /* -k: eval constant #ifs */
 175static bool             lnblank;                /* -l: blank deleted lines */
 176static bool             lnnum;                  /* -n: add #line directives */
 177static bool             symlist;                /* -s: output symbol list */
 178static bool             text;                   /* -t: this is a text file */
 179
 180static const char      *symname[MAXSYMS];       /* symbol name */
 181static const char      *value[MAXSYMS];         /* -Dsym=value */
 182static bool             ignore[MAXSYMS];        /* -iDsym or -iUsym */
 183static int              nsyms;                  /* number of symbols */
 184
 185static FILE            *input;                  /* input file pointer */
 186static const char      *filename;               /* input file name */
 187static int              linenum;                /* current line number */
 188
 189static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
 190static char            *keyword;                /* used for editing #elif's */
 191
 192static Comment_state    incomment;              /* comment parser state */
 193static Line_state       linestate;              /* #if line parser state */
 194static Ifstate          ifstate[MAXDEPTH];      /* #if processor state */
 195static bool             ignoring[MAXDEPTH];     /* ignore comments state */
 196static int              stifline[MAXDEPTH];     /* start of current #if */
 197static int              depth;                  /* current #if nesting */
 198static int              delcount;               /* count of deleted lines */
 199static bool             keepthis;               /* don't delete constant #if */
 200
 201static int              exitstat;               /* program exit status */
 202
 203static void             addsym(bool, bool, char *);
 204static void             debug(const char *, ...);
 205static void             done(void);
 206static void             error(const char *);
 207static int              findsym(const char *);
 208static void             flushline(bool);
 209static Linetype         get_line(void);
 210static Linetype         ifeval(const char **);
 211static void             ignoreoff(void);
 212static void             ignoreon(void);
 213static void             keywordedit(const char *);
 214static void             nest(void);
 215static void             process(void);
 216static const char      *skipcomment(const char *);
 217static const char      *skipsym(const char *);
 218static void             state(Ifstate);
 219static int              strlcmp(const char *, const char *, size_t);
 220static void             unnest(void);
 221static void             usage(void);
 222
 223#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_')
 224
 225/*
 226 * The main program.
 227 */
 228int
 229main(int argc, char *argv[])
 230{
 231        int opt;
 232
 233        while ((opt = getopt(argc, argv, "i:D:U:I:cdeklnst")) != -1)
 234                switch (opt) {
 235                case 'i': /* treat stuff controlled by these symbols as text */
 236                        /*
 237                         * For strict backwards-compatibility the U or D
 238                         * should be immediately after the -i but it doesn't
 239                         * matter much if we relax that requirement.
 240                         */
 241                        opt = *optarg++;
 242                        if (opt == 'D')
 243                                addsym(true, true, optarg);
 244                        else if (opt == 'U')
 245                                addsym(true, false, optarg);
 246                        else
 247                                usage();
 248                        break;
 249                case 'D': /* define a symbol */
 250                        addsym(false, true, optarg);
 251                        break;
 252                case 'U': /* undef a symbol */
 253                        addsym(false, false, optarg);
 254                        break;
 255                case 'I':
 256                        /* no-op for compatibility with cpp */
 257                        break;
 258                case 'c': /* treat -D as -U and vice versa */
 259                        complement = true;
 260                        break;
 261                case 'd':
 262                        debugging = true;
 263                        break;
 264                case 'e': /* fewer errors from dodgy lines */
 265                        iocccok = true;
 266                        break;
 267                case 'k': /* process constant #ifs */
 268                        killconsts = true;
 269                        break;
 270                case 'l': /* blank deleted lines instead of omitting them */
 271                        lnblank = true;
 272                        break;
 273                case 'n': /* add #line directive after deleted lines */
 274                        lnnum = true;
 275                        break;
 276                case 's': /* only output list of symbols that control #ifs */
 277                        symlist = true;
 278                        break;
 279                case 't': /* don't parse C comments */
 280                        text = true;
 281                        break;
 282                default:
 283                        usage();
 284                }
 285        argc -= optind;
 286        argv += optind;
 287        if (argc > 1) {
 288                errx(2, "can only do one file");
 289        } else if (argc == 1 && strcmp(*argv, "-") != 0) {
 290                filename = *argv;
 291                input = fopen(filename, "r");
 292                if (input == NULL)
 293                        err(2, "can't open %s", filename);
 294        } else {
 295                filename = "[stdin]";
 296                input = stdin;
 297        }
 298        process();
 299        abort(); /* bug */
 300}
 301
 302static void
 303usage(void)
 304{
 305        fprintf(stderr, "usage: unifdef [-cdeklnst] [-Ipath]"
 306            " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
 307        exit(2);
 308}
 309
 310/*
 311 * A state transition function alters the global #if processing state
 312 * in a particular way. The table below is indexed by the current
 313 * processing state and the type of the current line.
 314 *
 315 * Nesting is handled by keeping a stack of states; some transition
 316 * functions increase or decrease the depth. They also maintain the
 317 * ignore state on a stack. In some complicated cases they have to
 318 * alter the preprocessor directive, as follows.
 319 *
 320 * When we have processed a group that starts off with a known-false
 321 * #if/#elif sequence (which has therefore been deleted) followed by a
 322 * #elif that we don't understand and therefore must keep, we edit the
 323 * latter into a #if to keep the nesting correct.
 324 *
 325 * When we find a true #elif in a group, the following block will
 326 * always be kept and the rest of the sequence after the next #elif or
 327 * #else will be discarded. We edit the #elif into a #else and the
 328 * following directive to #endif since this has the desired behaviour.
 329 *
 330 * "Dodgy" directives are split across multiple lines, the most common
 331 * example being a multi-line comment hanging off the right of the
 332 * directive. We can handle them correctly only if there is no change
 333 * from printing to dropping (or vice versa) caused by that directive.
 334 * If the directive is the first of a group we have a choice between
 335 * failing with an error, or passing it through unchanged instead of
 336 * evaluating it. The latter is not the default to avoid questions from
 337 * users about unifdef unexpectedly leaving behind preprocessor directives.
 338 */
 339typedef void state_fn(void);
 340
 341/* report an error */
 342static void Eelif (void) { error("Inappropriate #elif"); }
 343static void Eelse (void) { error("Inappropriate #else"); }
 344static void Eendif(void) { error("Inappropriate #endif"); }
 345static void Eeof  (void) { error("Premature EOF"); }
 346static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
 347/* plain line handling */
 348static void print (void) { flushline(true); }
 349static void drop  (void) { flushline(false); }
 350/* output lacks group's start line */
 351static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
 352static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
 353static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
 354/* print/pass this block */
 355static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
 356static void Pelse (void) { print();              state(IS_PASS_ELSE); }
 357static void Pendif(void) { print(); unnest(); }
 358/* discard this block */
 359static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
 360static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
 361static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
 362static void Dendif(void) { drop();  unnest(); }
 363/* first line of group */
 364static void Fdrop (void) { nest();  Dfalse(); }
 365static void Fpass (void) { nest();  Pelif(); }
 366static void Ftrue (void) { nest();  Strue(); }
 367static void Ffalse(void) { nest();  Sfalse(); }
 368/* variable pedantry for obfuscated lines */
 369static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
 370static void Oif   (void) { if (!iocccok) Eioccc(); Fpass(); }
 371static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
 372/* ignore comments in this block */
 373static void Idrop (void) { Fdrop();  ignoreon(); }
 374static void Itrue (void) { Ftrue();  ignoreon(); }
 375static void Ifalse(void) { Ffalse(); ignoreon(); }
 376/* edit this line */
 377static void Mpass (void) { strncpy(keyword, "if  ", 4); Pelif(); }
 378static void Mtrue (void) { keywordedit("else\n");  state(IS_TRUE_MIDDLE); }
 379static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); }
 380static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); }
 381
 382static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
 383/* IS_OUTSIDE */
 384{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
 385  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
 386  print, done },
 387/* IS_FALSE_PREFIX */
 388{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
 389  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
 390  drop,  Eeof },
 391/* IS_TRUE_PREFIX */
 392{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
 393  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
 394  print, Eeof },
 395/* IS_PASS_MIDDLE */
 396{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
 397  Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
 398  print, Eeof },
 399/* IS_FALSE_MIDDLE */
 400{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
 401  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
 402  drop,  Eeof },
 403/* IS_TRUE_MIDDLE */
 404{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
 405  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
 406  print, Eeof },
 407/* IS_PASS_ELSE */
 408{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
 409  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
 410  print, Eeof },
 411/* IS_FALSE_ELSE */
 412{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
 413  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
 414  drop,  Eeof },
 415/* IS_TRUE_ELSE */
 416{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
 417  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
 418  print, Eeof },
 419/* IS_FALSE_TRAILER */
 420{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
 421  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
 422  drop,  Eeof }
 423/*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
 424  TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
 425  PLAIN  EOF */
 426};
 427
 428/*
 429 * State machine utility functions
 430 */
 431static void
 432done(void)
 433{
 434        if (incomment)
 435                error("EOF in comment");
 436        exit(exitstat);
 437}
 438static void
 439ignoreoff(void)
 440{
 441        if (depth == 0)
 442                abort(); /* bug */
 443        ignoring[depth] = ignoring[depth-1];
 444}
 445static void
 446ignoreon(void)
 447{
 448        ignoring[depth] = true;
 449}
 450static void
 451keywordedit(const char *replacement)
 452{
 453        size_t size = tline + sizeof(tline) - keyword;
 454        char *dst = keyword;
 455        const char *src = replacement;
 456        if (size != 0) {
 457                while ((--size != 0) && (*src != '\0'))
 458                        *dst++ = *src++;
 459                *dst = '\0';
 460        }
 461        print();
 462}
 463static void
 464nest(void)
 465{
 466        depth += 1;
 467        if (depth >= MAXDEPTH)
 468                error("Too many levels of nesting");
 469        stifline[depth] = linenum;
 470}
 471static void
 472unnest(void)
 473{
 474        if (depth == 0)
 475                abort(); /* bug */
 476        depth -= 1;
 477}
 478static void
 479state(Ifstate is)
 480{
 481        ifstate[depth] = is;
 482}
 483
 484/*
 485 * Write a line to the output or not, according to command line options.
 486 */
 487static void
 488flushline(bool keep)
 489{
 490        if (symlist)
 491                return;
 492        if (keep ^ complement) {
 493                if (lnnum && delcount > 0)
 494                        printf("#line %d\n", linenum);
 495                fputs(tline, stdout);
 496                delcount = 0;
 497        } else {
 498                if (lnblank)
 499                        putc('\n', stdout);
 500                exitstat = 1;
 501                delcount += 1;
 502        }
 503}
 504
 505/*
 506 * The driver for the state machine.
 507 */
 508static void
 509process(void)
 510{
 511        Linetype lineval;
 512
 513        for (;;) {
 514                linenum++;
 515                lineval = get_line();
 516                trans_table[ifstate[depth]][lineval]();
 517                debug("process %s -> %s depth %d",
 518                    linetype_name[lineval],
 519                    ifstate_name[ifstate[depth]], depth);
 520        }
 521}
 522
 523/*
 524 * Parse a line and determine its type. We keep the preprocessor line
 525 * parser state between calls in the global variable linestate, with
 526 * help from skipcomment().
 527 */
 528static Linetype
 529get_line(void)
 530{
 531        const char *cp;
 532        int cursym;
 533        int kwlen;
 534        Linetype retval;
 535        Comment_state wascomment;
 536
 537        if (fgets(tline, MAXLINE, input) == NULL)
 538                return (LT_EOF);
 539        retval = LT_PLAIN;
 540        wascomment = incomment;
 541        cp = skipcomment(tline);
 542        if (linestate == LS_START) {
 543                if (*cp == '#') {
 544                        linestate = LS_HASH;
 545                        cp = skipcomment(cp + 1);
 546                } else if (*cp != '\0')
 547                        linestate = LS_DIRTY;
 548        }
 549        if (!incomment && linestate == LS_HASH) {
 550                keyword = tline + (cp - tline);
 551                cp = skipsym(cp);
 552                kwlen = cp - keyword;
 553                /* no way can we deal with a continuation inside a keyword */
 554                if (strncmp(cp, "\\\n", 2) == 0)
 555                        Eioccc();
 556                if (strlcmp("ifdef", keyword, kwlen) == 0 ||
 557                    strlcmp("ifndef", keyword, kwlen) == 0) {
 558                        cp = skipcomment(cp);
 559                        if ((cursym = findsym(cp)) < 0)
 560                                retval = LT_IF;
 561                        else {
 562                                retval = (keyword[2] == 'n')
 563                                    ? LT_FALSE : LT_TRUE;
 564                                if (value[cursym] == NULL)
 565                                        retval = (retval == LT_TRUE)
 566                                            ? LT_FALSE : LT_TRUE;
 567                                if (ignore[cursym])
 568                                        retval = (retval == LT_TRUE)
 569                                            ? LT_TRUEI : LT_FALSEI;
 570                        }
 571                        cp = skipsym(cp);
 572                } else if (strlcmp("if", keyword, kwlen) == 0)
 573                        retval = ifeval(&cp);
 574                else if (strlcmp("elif", keyword, kwlen) == 0)
 575                        retval = ifeval(&cp) - LT_IF + LT_ELIF;
 576                else if (strlcmp("else", keyword, kwlen) == 0)
 577                        retval = LT_ELSE;
 578                else if (strlcmp("endif", keyword, kwlen) == 0)
 579                        retval = LT_ENDIF;
 580                else {
 581                        linestate = LS_DIRTY;
 582                        retval = LT_PLAIN;
 583                }
 584                cp = skipcomment(cp);
 585                if (*cp != '\0') {
 586                        linestate = LS_DIRTY;
 587                        if (retval == LT_TRUE || retval == LT_FALSE ||
 588                            retval == LT_TRUEI || retval == LT_FALSEI)
 589                                retval = LT_IF;
 590                        if (retval == LT_ELTRUE || retval == LT_ELFALSE)
 591                                retval = LT_ELIF;
 592                }
 593                if (retval != LT_PLAIN && (wascomment || incomment)) {
 594                        retval += LT_DODGY;
 595                        if (incomment)
 596                                linestate = LS_DIRTY;
 597                }
 598                /* skipcomment should have changed the state */
 599                if (linestate == LS_HASH)
 600                        abort(); /* bug */
 601        }
 602        if (linestate == LS_DIRTY) {
 603                while (*cp != '\0')
 604                        cp = skipcomment(cp + 1);
 605        }
 606        debug("parser %s comment %s line",
 607            comment_name[incomment], linestate_name[linestate]);
 608        return (retval);
 609}
 610
 611/*
 612 * These are the binary operators that are supported by the expression
 613 * evaluator. Note that if support for division is added then we also
 614 * need short-circuiting booleans because of divide-by-zero.
 615 */
 616static int op_lt(int a, int b) { return (a < b); }
 617static int op_gt(int a, int b) { return (a > b); }
 618static int op_le(int a, int b) { return (a <= b); }
 619static int op_ge(int a, int b) { return (a >= b); }
 620static int op_eq(int a, int b) { return (a == b); }
 621static int op_ne(int a, int b) { return (a != b); }
 622static int op_or(int a, int b) { return (a || b); }
 623static int op_and(int a, int b) { return (a && b); }
 624
 625/*
 626 * An evaluation function takes three arguments, as follows: (1) a pointer to
 627 * an element of the precedence table which lists the operators at the current
 628 * level of precedence; (2) a pointer to an integer which will receive the
 629 * value of the expression; and (3) a pointer to a char* that points to the
 630 * expression to be evaluated and that is updated to the end of the expression
 631 * when evaluation is complete. The function returns LT_FALSE if the value of
 632 * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the
 633 * expression could not be evaluated.
 634 */
 635struct ops;
 636
 637typedef Linetype eval_fn(const struct ops *, int *, const char **);
 638
 639static eval_fn eval_table, eval_unary;
 640
 641/*
 642 * The precedence table. Expressions involving binary operators are evaluated
 643 * in a table-driven way by eval_table. When it evaluates a subexpression it
 644 * calls the inner function with its first argument pointing to the next
 645 * element of the table. Innermost expressions have special non-table-driven
 646 * handling.
 647 */
 648static const struct ops {
 649        eval_fn *inner;
 650        struct op {
 651                const char *str;
 652                int (*fn)(int, int);
 653        } op[5];
 654} eval_ops[] = {
 655        { eval_table, { { "||", op_or } } },
 656        { eval_table, { { "&&", op_and } } },
 657        { eval_table, { { "==", op_eq },
 658                        { "!=", op_ne } } },
 659        { eval_unary, { { "<=", op_le },
 660                        { ">=", op_ge },
 661                        { "<", op_lt },
 662                        { ">", op_gt } } }
 663};
 664
 665/*
 666 * Function for evaluating the innermost parts of expressions,
 667 * viz. !expr (expr) defined(symbol) symbol number
 668 * We reset the keepthis flag when we find a non-constant subexpression.
 669 */
 670static Linetype
 671eval_unary(const struct ops *ops, int *valp, const char **cpp)
 672{
 673        const char *cp;
 674        char *ep;
 675        int sym;
 676
 677        cp = skipcomment(*cpp);
 678        if (*cp == '!') {
 679                debug("eval%d !", ops - eval_ops);
 680                cp++;
 681                if (eval_unary(ops, valp, &cp) == LT_IF) {
 682                        *cpp = cp;
 683                        return (LT_IF);
 684                }
 685                *valp = !*valp;
 686        } else if (*cp == '(') {
 687                cp++;
 688                debug("eval%d (", ops - eval_ops);
 689                if (eval_table(eval_ops, valp, &cp) == LT_IF)
 690                        return (LT_IF);
 691                cp = skipcomment(cp);
 692                if (*cp++ != ')')
 693                        return (LT_IF);
 694        } else if (isdigit((unsigned char)*cp)) {
 695                debug("eval%d number", ops - eval_ops);
 696                *valp = strtol(cp, &ep, 0);
 697                cp = skipsym(cp);
 698        } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
 699                cp = skipcomment(cp+7);
 700                debug("eval%d defined", ops - eval_ops);
 701                if (*cp++ != '(')
 702                        return (LT_IF);
 703                cp = skipcomment(cp);
 704                sym = findsym(cp);
 705                cp = skipsym(cp);
 706                cp = skipcomment(cp);
 707                if (*cp++ != ')')
 708                        return (LT_IF);
 709                if (sym >= 0)
 710                        *valp = (value[sym] != NULL);
 711                else {
 712                        *cpp = cp;
 713                        return (LT_IF);
 714                }
 715                keepthis = false;
 716        } else if (!endsym(*cp)) {
 717                debug("eval%d symbol", ops - eval_ops);
 718                sym = findsym(cp);
 719                if (sym < 0)
 720                        return (LT_IF);
 721                if (value[sym] == NULL)
 722                        *valp = 0;
 723                else {
 724                        *valp = strtol(value[sym], &ep, 0);
 725                        if (*ep != '\0' || ep == value[sym])
 726                                return (LT_IF);
 727                }
 728                cp = skipsym(cp);
 729                keepthis = false;
 730        } else {
 731                debug("eval%d bad expr", ops - eval_ops);
 732                return (LT_IF);
 733        }
 734
 735        *cpp = cp;
 736        debug("eval%d = %d", ops - eval_ops, *valp);
 737        return (*valp ? LT_TRUE : LT_FALSE);
 738}
 739
 740/*
 741 * Table-driven evaluation of binary operators.
 742 */
 743static Linetype
 744eval_table(const struct ops *ops, int *valp, const char **cpp)
 745{
 746        const struct op *op;
 747        const char *cp;
 748        int val;
 749        Linetype lhs, rhs;
 750
 751        debug("eval%d", ops - eval_ops);
 752        cp = *cpp;
 753        lhs = ops->inner(ops+1, valp, &cp);
 754        for (;;) {
 755                cp = skipcomment(cp);
 756                for (op = ops->op; op->str != NULL; op++)
 757                        if (strncmp(cp, op->str, strlen(op->str)) == 0)
 758                                break;
 759                if (op->str == NULL)
 760                        break;
 761                cp += strlen(op->str);
 762                debug("eval%d %s", ops - eval_ops, op->str);
 763                rhs = ops->inner(ops+1, &val, &cp);
 764                if (op->fn == op_and && (lhs == LT_FALSE || rhs == LT_FALSE)) {
 765                        debug("eval%d: and always false", ops - eval_ops);
 766                        if (lhs == LT_IF)
 767                                *valp = val;
 768                        lhs = LT_FALSE;
 769                        continue;
 770                }
 771                if (op->fn == op_or && (lhs == LT_TRUE || rhs == LT_TRUE)) {
 772                        debug("eval%d: or always true", ops - eval_ops);
 773                        if (lhs == LT_IF)
 774                                *valp = val;
 775                        lhs = LT_TRUE;
 776                        continue;
 777                }
 778                if (rhs == LT_IF)
 779                        lhs = LT_IF;
 780                if (lhs != LT_IF)
 781                        *valp = op->fn(*valp, val);
 782        }
 783
 784        *cpp = cp;
 785        debug("eval%d = %d", ops - eval_ops, *valp);
 786        if (lhs != LT_IF)
 787                lhs = (*valp ? LT_TRUE : LT_FALSE);
 788        return lhs;
 789}
 790
 791/*
 792 * Evaluate the expression on a #if or #elif line. If we can work out
 793 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
 794 * return just a generic LT_IF.
 795 */
 796static Linetype
 797ifeval(const char **cpp)
 798{
 799        const char *cp = *cpp;
 800        int ret;
 801        int val;
 802
 803        debug("eval %s", *cpp);
 804        keepthis = killconsts ? false : true;
 805        ret = eval_table(eval_ops, &val, &cp);
 806        if (ret != LT_IF)
 807                *cpp = cp;
 808        debug("eval = %d", val);
 809        return (keepthis ? LT_IF : ret);
 810}
 811
 812/*
 813 * Skip over comments, strings, and character literals and stop at the
 814 * next character position that is not whitespace. Between calls we keep
 815 * the comment state in the global variable incomment, and we also adjust
 816 * the global variable linestate when we see a newline.
 817 * XXX: doesn't cope with the buffer splitting inside a state transition.
 818 */
 819static const char *
 820skipcomment(const char *cp)
 821{
 822        if (text || ignoring[depth]) {
 823                for (; isspace((unsigned char)*cp); cp++)
 824                        if (*cp == '\n')
 825                                linestate = LS_START;
 826                return (cp);
 827        }
 828        while (*cp != '\0')
 829                /* don't reset to LS_START after a line continuation */
 830                if (strncmp(cp, "\\\n", 2) == 0)
 831                        cp += 2;
 832                else switch (incomment) {
 833                case NO_COMMENT:
 834                        if (strncmp(cp, "/\\\n", 3) == 0) {
 835                                incomment = STARTING_COMMENT;
 836                                cp += 3;
 837                        } else if (strncmp(cp, "/*", 2) == 0) {
 838                                incomment = C_COMMENT;
 839                                cp += 2;
 840                        } else if (strncmp(cp, "//", 2) == 0) {
 841                                incomment = CXX_COMMENT;
 842                                cp += 2;
 843                        } else if (strncmp(cp, "\'", 1) == 0) {
 844                                incomment = CHAR_LITERAL;
 845                                linestate = LS_DIRTY;
 846                                cp += 1;
 847                        } else if (strncmp(cp, "\"", 1) == 0) {
 848                                incomment = STRING_LITERAL;
 849                                linestate = LS_DIRTY;
 850                                cp += 1;
 851                        } else if (strncmp(cp, "\n", 1) == 0) {
 852                                linestate = LS_START;
 853                                cp += 1;
 854                        } else if (strchr(" \t", *cp) != NULL) {
 855                                cp += 1;
 856                        } else
 857                                return (cp);
 858                        continue;
 859                case CXX_COMMENT:
 860                        if (strncmp(cp, "\n", 1) == 0) {
 861                                incomment = NO_COMMENT;
 862                                linestate = LS_START;
 863                        }
 864                        cp += 1;
 865                        continue;
 866                case CHAR_LITERAL:
 867                case STRING_LITERAL:
 868                        if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
 869                            (incomment == STRING_LITERAL && cp[0] == '\"')) {
 870                                incomment = NO_COMMENT;
 871                                cp += 1;
 872                        } else if (cp[0] == '\\') {
 873                                if (cp[1] == '\0')
 874                                        cp += 1;
 875                                else
 876                                        cp += 2;
 877                        } else if (strncmp(cp, "\n", 1) == 0) {
 878                                if (incomment == CHAR_LITERAL)
 879                                        error("unterminated char literal");
 880                                else
 881                                        error("unterminated string literal");
 882                        } else
 883                                cp += 1;
 884                        continue;
 885                case C_COMMENT:
 886                        if (strncmp(cp, "*\\\n", 3) == 0) {
 887                                incomment = FINISHING_COMMENT;
 888                                cp += 3;
 889                        } else if (strncmp(cp, "*/", 2) == 0) {
 890                                incomment = NO_COMMENT;
 891                                cp += 2;
 892                        } else
 893                                cp += 1;
 894                        continue;
 895                case STARTING_COMMENT:
 896                        if (*cp == '*') {
 897                                incomment = C_COMMENT;
 898                                cp += 1;
 899                        } else if (*cp == '/') {
 900                                incomment = CXX_COMMENT;
 901                                cp += 1;
 902                        } else {
 903                                incomment = NO_COMMENT;
 904                                linestate = LS_DIRTY;
 905                        }
 906                        continue;
 907                case FINISHING_COMMENT:
 908                        if (*cp == '/') {
 909                                incomment = NO_COMMENT;
 910                                cp += 1;
 911                        } else
 912                                incomment = C_COMMENT;
 913                        continue;
 914                default:
 915                        abort(); /* bug */
 916                }
 917        return (cp);
 918}
 919
 920/*
 921 * Skip over an identifier.
 922 */
 923static const char *
 924skipsym(const char *cp)
 925{
 926        while (!endsym(*cp))
 927                ++cp;
 928        return (cp);
 929}
 930
 931/*
 932 * Look for the symbol in the symbol table. If is is found, we return
 933 * the symbol table index, else we return -1.
 934 */
 935static int
 936findsym(const char *str)
 937{
 938        const char *cp;
 939        int symind;
 940
 941        cp = skipsym(str);
 942        if (cp == str)
 943                return (-1);
 944        if (symlist) {
 945                printf("%.*s\n", (int)(cp-str), str);
 946                /* we don't care about the value of the symbol */
 947                return (0);
 948        }
 949        for (symind = 0; symind < nsyms; ++symind) {
 950                if (strlcmp(symname[symind], str, cp-str) == 0) {
 951                        debug("findsym %s %s", symname[symind],
 952                            value[symind] ? value[symind] : "");
 953                        return (symind);
 954                }
 955        }
 956        return (-1);
 957}
 958
 959/*
 960 * Add a symbol to the symbol table.
 961 */
 962static void
 963addsym(bool ignorethis, bool definethis, char *sym)
 964{
 965        int symind;
 966        char *val;
 967
 968        symind = findsym(sym);
 969        if (symind < 0) {
 970                if (nsyms >= MAXSYMS)
 971                        errx(2, "too many symbols");
 972                symind = nsyms++;
 973        }
 974        symname[symind] = sym;
 975        ignore[symind] = ignorethis;
 976        val = sym + (skipsym(sym) - sym);
 977        if (definethis) {
 978                if (*val == '=') {
 979                        value[symind] = val+1;
 980                        *val = '\0';
 981                } else if (*val == '\0')
 982                        value[symind] = "";
 983                else
 984                        usage();
 985        } else {
 986                if (*val != '\0')
 987                        usage();
 988                value[symind] = NULL;
 989        }
 990}
 991
 992/*
 993 * Compare s with n characters of t.
 994 * The same as strncmp() except that it checks that s[n] == '\0'.
 995 */
 996static int
 997strlcmp(const char *s, const char *t, size_t n)
 998{
 999        while (n-- && *t != '\0')
1000                if (*s != *t)
1001                        return ((unsigned char)*s - (unsigned char)*t);
1002                else
1003                        ++s, ++t;
1004        return ((unsigned char)*s);
1005}
1006
1007/*
1008 * Diagnostics.
1009 */
1010static void
1011debug(const char *msg, ...)
1012{
1013        va_list ap;
1014
1015        if (debugging) {
1016                va_start(ap, msg);
1017                vwarnx(msg, ap);
1018                va_end(ap);
1019        }
1020}
1021
1022static void
1023error(const char *msg)
1024{
1025        if (depth == 0)
1026                warnx("%s: %d: %s", filename, linenum, msg);
1027        else
1028                warnx("%s: %d: %s (#if line %d depth %d)",
1029                    filename, linenum, msg, stifline[depth], depth);
1030        errx(2, "output may be truncated");
1031}
1032