1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Lexical analysis for genksyms. 4 * Copyright 1996, 1997 Linux International. 5 * 6 * New implementation contributed by Richard Henderson <rth@tamu.edu> 7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se> 8 * 9 * Taken from Linux modutils 2.4.22. 10 */ 11 12%{ 13 14#include <limits.h> 15#include <stdlib.h> 16#include <string.h> 17#include <ctype.h> 18 19#include "genksyms.h" 20#include "parse.tab.h" 21 22/* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens in the second stage. */ 24#define YY_DECL static int yylex1(void) 25 26%} 27 28IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 30O_INT 0[0-7]* 31D_INT [1-9][0-9]* 32X_INT 0[Xx][0-9A-Fa-f]+ 33I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 36FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37EXP [Ee][+-]?[0-9]+ 38F_SUF [FfLl] 39REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 41STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 44MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 46/* We don't do multiple input files. */ 47%option noyywrap 48 49%option noinput 50 51%% 52 53 54 /* Keep track of our location in the original source files. */ 55^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56^#.*\n cur_line++; 57\n cur_line++; 58 59 /* Ignore all other whitespace. */ 60[ \t\f\v\r]+ ; 61 62 63{STRING} return STRING; 64{CHAR} return CHAR; 65{IDENT} return IDENT; 66 67 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 71{MC_TOKEN} return OTHER; 72{INT} return INT; 73{REAL} return REAL; 74 75"..." return DOTS; 76 77 /* All other tokens are single characters. */ 78. return yytext[0]; 79 80 81%% 82 83/* Bring in the keyword recognizer. */ 84 85#include "keywords.c" 86 87 88/* Macros to append to our phrase collection list. */ 89 90/* 91 * We mark any token, that that equals to a known enumerator, as 92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 93 * the only problem is struct and union members: 94 * enum e { a, b }; struct s { int a, b; } 95 * but in this case, the only effect will be, that the ABI checksums become 96 * more volatile, which is acceptable. Also, such collisions are quite rare, 97 * so far it was only observed in include/linux/telephony.h. 98 */ 99#define _APP(T,L) do { \ 100 cur_node = next_node; \ 101 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cur_node; \ 103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = \ 105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 106 SYM_ENUM_CONST : SYM_NORMAL ; \ 107 cur_node->in_source_file = in_source_file; \ 108 } while (0) 109 110#define APP _APP(yytext, yyleng) 111 112 113/* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are returned. */ 115 116int 117yylex(void) 118{ 119 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT, 122 ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4, 123 ST_TABLE_5, ST_TABLE_6 124 } lexstate = ST_NOTSTARTED; 125 126 static int suppress_type_lookup, dont_want_brace_phrase; 127 static struct string_list *next_node; 128 129 int token, count = 0; 130 struct string_list *cur_node; 131 132 if (lexstate == ST_NOTSTARTED) 133 { 134 next_node = xmalloc(sizeof(*next_node)); 135 next_node->next = NULL; 136 lexstate = ST_NORMAL; 137 } 138 139repeat: 140 token = yylex1(); 141 142 if (token == 0) 143 return 0; 144 else if (token == FILENAME) 145 { 146 char *file, *e; 147 148 /* Save the filename and line number for later error messages. */ 149 150 if (cur_filename) 151 free(cur_filename); 152 153 file = strchr(yytext, '\"')+1; 154 e = strchr(file, '\"'); 155 *e = '\0'; 156 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 157 cur_line = atoi(yytext+2); 158 159 if (!source_file) { 160 source_file = xstrdup(cur_filename); 161 in_source_file = 1; 162 } else { 163 in_source_file = (strcmp(cur_filename, source_file) == 0); 164 } 165 166 goto repeat; 167 } 168 169 switch (lexstate) 170 { 171 case ST_NORMAL: 172 switch (token) 173 { 174 case IDENT: 175 APP; 176 { 177 int r = is_reserved_word(yytext, yyleng); 178 if (r >= 0) 179 { 180 switch (token = r) 181 { 182 case ATTRIBUTE_KEYW: 183 lexstate = ST_ATTRIBUTE; 184 count = 0; 185 goto repeat; 186 case ASM_KEYW: 187 lexstate = ST_ASM; 188 count = 0; 189 goto repeat; 190 case TYPEOF_KEYW: 191 lexstate = ST_TYPEOF; 192 count = 0; 193 goto repeat; 194 195 case STRUCT_KEYW: 196 case UNION_KEYW: 197 case ENUM_KEYW: 198 dont_want_brace_phrase = 3; 199 suppress_type_lookup = 2; 200 goto fini; 201 202 case EXPORT_SYMBOL_KEYW: 203 goto fini; 204 205 case STATIC_ASSERT_KEYW: 206 lexstate = ST_STATIC_ASSERT; 207 count = 0; 208 goto repeat; 209 } 210 } 211 if (!suppress_type_lookup) 212 { 213 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 214 token = TYPE; 215 } 216 } 217 break; 218 219 case '[': 220 APP; 221 lexstate = ST_BRACKET; 222 count = 1; 223 goto repeat; 224 225 case '{': 226 APP; 227 if (dont_want_brace_phrase) 228 break; 229 lexstate = ST_BRACE; 230 count = 1; 231 goto repeat; 232 233 case '=': case ':': 234 APP; 235 lexstate = ST_EXPRESSION; 236 break; 237 238 case DOTS: 239 default: 240 APP; 241 break; 242 } 243 break; 244 245 case ST_ATTRIBUTE: 246 APP; 247 switch (token) 248 { 249 case '(': 250 ++count; 251 goto repeat; 252 case ')': 253 if (--count == 0) 254 { 255 lexstate = ST_NORMAL; 256 token = ATTRIBUTE_PHRASE; 257 break; 258 } 259 goto repeat; 260 default: 261 goto repeat; 262 } 263 break; 264 265 case ST_ASM: 266 APP; 267 switch (token) 268 { 269 case '(': 270 ++count; 271 goto repeat; 272 case ')': 273 if (--count == 0) 274 { 275 lexstate = ST_NORMAL; 276 token = ASM_PHRASE; 277 break; 278 } 279 goto repeat; 280 default: 281 goto repeat; 282 } 283 break; 284 285 case ST_TYPEOF_1: 286 if (token == IDENT) 287 { 288 if (is_reserved_word(yytext, yyleng) >= 0 289 || find_symbol(yytext, SYM_TYPEDEF, 1)) 290 { 291 yyless(0); 292 unput('('); 293 lexstate = ST_NORMAL; 294 token = TYPEOF_KEYW; 295 break; 296 } 297 _APP("(", 1); 298 } 299 lexstate = ST_TYPEOF; 300 /* FALLTHRU */ 301 302 case ST_TYPEOF: 303 switch (token) 304 { 305 case '(': 306 if ( ++count == 1 ) 307 lexstate = ST_TYPEOF_1; 308 else 309 APP; 310 goto repeat; 311 case ')': 312 APP; 313 if (--count == 0) 314 { 315 lexstate = ST_NORMAL; 316 token = TYPEOF_PHRASE; 317 break; 318 } 319 goto repeat; 320 default: 321 APP; 322 goto repeat; 323 } 324 break; 325 326 case ST_BRACKET: 327 APP; 328 switch (token) 329 { 330 case '[': 331 ++count; 332 goto repeat; 333 case ']': 334 if (--count == 0) 335 { 336 lexstate = ST_NORMAL; 337 token = BRACKET_PHRASE; 338 break; 339 } 340 goto repeat; 341 default: 342 goto repeat; 343 } 344 break; 345 346 case ST_BRACE: 347 APP; 348 switch (token) 349 { 350 case '{': 351 ++count; 352 goto repeat; 353 case '}': 354 if (--count == 0) 355 { 356 lexstate = ST_NORMAL; 357 token = BRACE_PHRASE; 358 break; 359 } 360 goto repeat; 361 default: 362 goto repeat; 363 } 364 break; 365 366 case ST_EXPRESSION: 367 switch (token) 368 { 369 case '(': case '[': case '{': 370 ++count; 371 APP; 372 goto repeat; 373 case '}': 374 /* is this the last line of an enum declaration? */ 375 if (count == 0) 376 { 377 /* Put back the token we just read so's we can find it again 378 after registering the expression. */ 379 unput(token); 380 381 lexstate = ST_NORMAL; 382 token = EXPRESSION_PHRASE; 383 break; 384 } 385 /* FALLTHRU */ 386 case ')': case ']': 387 --count; 388 APP; 389 goto repeat; 390 case ',': case ';': 391 if (count == 0) 392 { 393 /* Put back the token we just read so's we can find it again 394 after registering the expression. */ 395 unput(token); 396 397 lexstate = ST_NORMAL; 398 token = EXPRESSION_PHRASE; 399 break; 400 } 401 APP; 402 goto repeat; 403 default: 404 APP; 405 goto repeat; 406 } 407 break; 408 409 case ST_STATIC_ASSERT: 410 APP; 411 switch (token) 412 { 413 case '(': 414 ++count; 415 goto repeat; 416 case ')': 417 if (--count == 0) 418 { 419 lexstate = ST_NORMAL; 420 token = STATIC_ASSERT_PHRASE; 421 break; 422 } 423 goto repeat; 424 default: 425 goto repeat; 426 } 427 break; 428 429 case ST_TABLE_1: 430 goto repeat; 431 432 case ST_TABLE_2: 433 if (token == IDENT && yyleng == 1 && yytext[0] == 'X') 434 { 435 token = EXPORT_SYMBOL_KEYW; 436 lexstate = ST_TABLE_5; 437 APP; 438 break; 439 } 440 lexstate = ST_TABLE_6; 441 /* FALLTHRU */ 442 443 case ST_TABLE_6: 444 switch (token) 445 { 446 case '{': case '[': case '(': 447 ++count; 448 break; 449 case '}': case ']': case ')': 450 --count; 451 break; 452 case ',': 453 if (count == 0) 454 lexstate = ST_TABLE_2; 455 break; 456 }; 457 goto repeat; 458 459 case ST_TABLE_3: 460 goto repeat; 461 462 case ST_TABLE_4: 463 if (token == ';') 464 lexstate = ST_NORMAL; 465 goto repeat; 466 467 case ST_TABLE_5: 468 switch (token) 469 { 470 case ',': 471 token = ';'; 472 lexstate = ST_TABLE_2; 473 APP; 474 break; 475 default: 476 APP; 477 break; 478 } 479 break; 480 481 default: 482 exit(1); 483 } 484fini: 485 486 if (suppress_type_lookup > 0) 487 --suppress_type_lookup; 488 if (dont_want_brace_phrase > 0) 489 --dont_want_brace_phrase; 490 491 yylval = &next_node->next; 492 493 return token; 494} 495