LXR linux/scripts/genksyms/lex.l

   1/* Lexical analysis for genksyms.
   2   Copyright 1996, 1997 Linux International.
   3
   4   New implementation contributed by Richard Henderson <rth@tamu.edu>
   5   Based on original work by Bjorn Ekwall <bj0rn@blox.se>
   6
   7   Taken from Linux modutils 2.4.22.
   8
   9   This program is free software; you can redistribute it and/or modify it
  10   under the terms of the GNU General Public License as published by the
  11   Free Software Foundation; either version 2 of the License, or (at your
  12   option) any later version.
  13
  14   This program is distributed in the hope that it will be useful, but
  15   WITHOUT ANY WARRANTY; without even the implied warranty of
  16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17   General Public License for more details.
  18
  19   You should have received a copy of the GNU General Public License
  20   along with this program; if not, write to the Free Software Foundation,
  21   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23
  24%{
  25
  26#include <limits.h>
  27#include <stdlib.h>
  28#include <string.h>
  29#include <ctype.h>
  30
  31#include "genksyms.h"
  32#include "parse.h"
  33
  34/* We've got a two-level lexer here.  We let flex do basic tokenization
  35   and then we categorize those basic tokens in the second stage.  */
  36#define YY_DECL         static int yylex1(void)
  37
  38%}
  39
  40IDENT                   [A-Za-z_\$][A-Za-z0-9_\$]*
  41
  42O_INT                   0[0-7]*
  43D_INT                   [1-9][0-9]*
  44X_INT                   0[Xx][0-9A-Fa-f]+
  45I_SUF                   [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
  46INT                     ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
  47
  48FRAC                    ([0-9]*\.[0-9]+)|([0-9]+\.)
  49EXP                     [Ee][+-]?[0-9]+
  50F_SUF                   [FfLl]
  51REAL                    ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
  52
  53STRING                  L?\"([^\\\"]*\\.)*[^\\\"]*\"
  54CHAR                    L?\'([^\\\']*\\.)*[^\\\']*\'
  55
  56MC_TOKEN                ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
  57
  58/* Version 2 checksumming does proper tokenization; version 1 wasn't
  59   quite so pedantic.  */
  60%s V2_TOKENS
  61
  62/* We don't do multiple input files.  */
  63%option noyywrap
  64
  65%option noinput
  66
  67%%
  68
  69
  70 /* Keep track of our location in the original source files.  */
  71^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n     return FILENAME;
  72^#.*\n                                  cur_line++;
  73\n                                      cur_line++;
  74
  75 /* Ignore all other whitespace.  */
  76[ \t\f\v\r]+                            ;
  77
  78
  79{STRING}                                return STRING;
  80{CHAR}                                  return CHAR;
  81{IDENT}                                 return IDENT;
  82
  83 /* The Pedant requires that the other C multi-character tokens be
  84    recognized as tokens.  We don't actually use them since we don't
  85    parse expressions, but we do want whitespace to be arranged
  86    around them properly.  */
  87<V2_TOKENS>{MC_TOKEN}                   return OTHER;
  88<V2_TOKENS>{INT}                        return INT;
  89<V2_TOKENS>{REAL}                       return REAL;
  90
  91"..."                                   return DOTS;
  92
  93 /* All other tokens are single characters.  */
  94.                                       return yytext[0];
  95
  96
  97%%
  98
  99/* Bring in the keyword recognizer.  */
 100
 101#include "keywords.c"
 102
 103
 104/* Macros to append to our phrase collection list.  */
 105
 106#define _APP(T,L)       do {                                               \
 107                          cur_node = next_node;                            \
 108                          next_node = xmalloc(sizeof(*next_node));         \
 109                          next_node->next = cur_node;                      \
 110                          cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
 111                          cur_node->tag = SYM_NORMAL;                      \
 112                        } while (0)
 113
 114#define APP             _APP(yytext, yyleng)
 115
 116
 117/* The second stage lexer.  Here we incorporate knowledge of the state
 118   of the parser to tailor the tokens that are returned.  */
 119
 120int
 121yylex(void)
 122{
 123  static enum {
 124    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
 125    ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
 126    ST_TABLE_5, ST_TABLE_6
 127  } lexstate = ST_NOTSTARTED;
 128
 129  static int suppress_type_lookup, dont_want_brace_phrase;
 130  static struct string_list *next_node;
 131
 132  int token, count = 0;
 133  struct string_list *cur_node;
 134
 135  if (lexstate == ST_NOTSTARTED)
 136    {
 137      BEGIN(V2_TOKENS);
 138      next_node = xmalloc(sizeof(*next_node));
 139      next_node->next = NULL;
 140      lexstate = ST_NORMAL;
 141    }
 142
 143repeat:
 144  token = yylex1();
 145
 146  if (token == 0)
 147    return 0;
 148  else if (token == FILENAME)
 149    {
 150      char *file, *e;
 151
 152      /* Save the filename and line number for later error messages.  */
 153
 154      if (cur_filename)
 155        free(cur_filename);
 156
 157      file = strchr(yytext, '\"')+1;
 158      e = strchr(file, '\"');
 159      *e = '\0';
 160      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
 161      cur_line = atoi(yytext+2);
 162
 163      goto repeat;
 164    }
 165
 166  switch (lexstate)
 167    {
 168    case ST_NORMAL:
 169      switch (token)
 170        {
 171        case IDENT:
 172          APP;
 173          {
 174            const struct resword *r = is_reserved_word(yytext, yyleng);
 175            if (r)
 176              {
 177                switch (token = r->token)
 178                  {
 179                  case ATTRIBUTE_KEYW:
 180                    lexstate = ST_ATTRIBUTE;
 181                    count = 0;
 182                    goto repeat;
 183                  case ASM_KEYW:
 184                    lexstate = ST_ASM;
 185                    count = 0;
 186                    goto repeat;
 187
 188                  case STRUCT_KEYW:
 189                  case UNION_KEYW:
 190                    dont_want_brace_phrase = 3;
 191                  case ENUM_KEYW:
 192                    suppress_type_lookup = 2;
 193                    goto fini;
 194
 195                  case EXPORT_SYMBOL_KEYW:
 196                      goto fini;
 197                  }
 198              }
 199            if (!suppress_type_lookup)
 200              {
 201                struct symbol *sym = find_symbol(yytext, SYM_TYPEDEF);
 202                if (sym && sym->type == SYM_TYPEDEF)
 203                  token = TYPE;
 204              }
 205          }
 206          break;
 207
 208        case '[':
 209          APP;
 210          lexstate = ST_BRACKET;
 211          count = 1;
 212          goto repeat;
 213
 214        case '{':
 215          APP;
 216          if (dont_want_brace_phrase)
 217            break;
 218          lexstate = ST_BRACE;
 219          count = 1;
 220          goto repeat;
 221
 222        case '=': case ':':
 223          APP;
 224          lexstate = ST_EXPRESSION;
 225          break;
 226
 227        case DOTS:
 228        default:
 229          APP;
 230          break;
 231        }
 232      break;
 233
 234    case ST_ATTRIBUTE:
 235      APP;
 236      switch (token)
 237        {
 238        case '(':
 239          ++count;
 240          goto repeat;
 241        case ')':
 242          if (--count == 0)
 243            {
 244              lexstate = ST_NORMAL;
 245              token = ATTRIBUTE_PHRASE;
 246              break;
 247            }
 248          goto repeat;
 249        default:
 250          goto repeat;
 251        }
 252      break;
 253
 254    case ST_ASM:
 255      APP;
 256      switch (token)
 257        {
 258        case '(':
 259          ++count;
 260          goto repeat;
 261        case ')':
 262          if (--count == 0)
 263            {
 264              lexstate = ST_NORMAL;
 265              token = ASM_PHRASE;
 266              break;
 267            }
 268          goto repeat;
 269        default:
 270          goto repeat;
 271        }
 272      break;
 273
 274    case ST_BRACKET:
 275      APP;
 276      switch (token)
 277        {
 278        case '[':
 279          ++count;
 280          goto repeat;
 281        case ']':
 282          if (--count == 0)
 283            {
 284              lexstate = ST_NORMAL;
 285              token = BRACKET_PHRASE;
 286              break;
 287            }
 288          goto repeat;
 289        default:
 290          goto repeat;
 291        }
 292      break;
 293
 294    case ST_BRACE:
 295      APP;
 296      switch (token)
 297        {
 298        case '{':
 299          ++count;
 300          goto repeat;
 301        case '}':
 302          if (--count == 0)
 303            {
 304              lexstate = ST_NORMAL;
 305              token = BRACE_PHRASE;
 306              break;
 307            }
 308          goto repeat;
 309        default:
 310          goto repeat;
 311        }
 312      break;
 313
 314    case ST_EXPRESSION:
 315      switch (token)
 316        {
 317        case '(': case '[': case '{':
 318          ++count;
 319          APP;
 320          goto repeat;
 321        case ')': case ']': case '}':
 322          --count;
 323          APP;
 324          goto repeat;
 325        case ',': case ';':
 326          if (count == 0)
 327            {
 328              /* Put back the token we just read so's we can find it again
 329                 after registering the expression.  */
 330              unput(token);
 331
 332              lexstate = ST_NORMAL;
 333              token = EXPRESSION_PHRASE;
 334              break;
 335            }
 336          APP;
 337          goto repeat;
 338        default:
 339          APP;
 340          goto repeat;
 341        }
 342      break;
 343
 344    case ST_TABLE_1:
 345      goto repeat;
 346
 347    case ST_TABLE_2:
 348      if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
 349        {
 350          token = EXPORT_SYMBOL_KEYW;
 351          lexstate = ST_TABLE_5;
 352          APP;
 353          break;
 354        }
 355      lexstate = ST_TABLE_6;
 356      /* FALLTHRU */
 357
 358    case ST_TABLE_6:
 359      switch (token)
 360        {
 361        case '{': case '[': case '(':
 362          ++count;
 363          break;
 364        case '}': case ']': case ')':
 365          --count;
 366          break;
 367        case ',':
 368          if (count == 0)
 369            lexstate = ST_TABLE_2;
 370          break;
 371        };
 372      goto repeat;
 373
 374    case ST_TABLE_3:
 375      goto repeat;
 376
 377    case ST_TABLE_4:
 378      if (token == ';')
 379        lexstate = ST_NORMAL;
 380      goto repeat;
 381
 382    case ST_TABLE_5:
 383      switch (token)
 384        {
 385        case ',':
 386          token = ';';
 387          lexstate = ST_TABLE_2;
 388          APP;
 389          break;
 390        default:
 391          APP;
 392          break;
 393        }
 394      break;
 395
 396    default:
 397      exit(1);
 398    }
 399fini:
 400
 401  if (suppress_type_lookup > 0)
 402    --suppress_type_lookup;
 403  if (dont_want_brace_phrase > 0)
 404    --dont_want_brace_phrase;
 405
 406  yylval = &next_node->next;
 407
 408  return token;
 409}
 410