qemu/json-lexer.c
<<
>>
Prefs
   1/*
   2 * JSON lexer
   3 *
   4 * Copyright IBM, Corp. 2009
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10 * See the COPYING.LIB file in the top-level directory.
  11 *
  12 */
  13
  14#include "qstring.h"
  15#include "qlist.h"
  16#include "qdict.h"
  17#include "qint.h"
  18#include "qemu-common.h"
  19#include "json-lexer.h"
  20
  21/*
  22 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
  23 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
  24 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
  25 * [{}\[\],:]
  26 * [a-z]+
  27 *
  28 */
  29
  30enum json_lexer_state {
  31    ERROR = 0,
  32    IN_DQ_UCODE3,
  33    IN_DQ_UCODE2,
  34    IN_DQ_UCODE1,
  35    IN_DQ_UCODE0,
  36    IN_DQ_STRING_ESCAPE,
  37    IN_DQ_STRING,
  38    IN_SQ_UCODE3,
  39    IN_SQ_UCODE2,
  40    IN_SQ_UCODE1,
  41    IN_SQ_UCODE0,
  42    IN_SQ_STRING_ESCAPE,
  43    IN_SQ_STRING,
  44    IN_ZERO,
  45    IN_DIGITS,
  46    IN_DIGIT,
  47    IN_EXP_E,
  48    IN_MANTISSA,
  49    IN_MANTISSA_DIGITS,
  50    IN_NONZERO_NUMBER,
  51    IN_NEG_NONZERO_NUMBER,
  52    IN_KEYWORD,
  53    IN_ESCAPE,
  54    IN_ESCAPE_L,
  55    IN_ESCAPE_LL,
  56    IN_ESCAPE_I,
  57    IN_ESCAPE_I6,
  58    IN_ESCAPE_I64,
  59    IN_WHITESPACE,
  60    IN_START,
  61};
  62
  63#define TERMINAL(state) [0 ... 0x7F] = (state)
  64
  65/* Return whether TERMINAL is a terminal state and the transition to it
  66   from OLD_STATE required lookahead.  This happens whenever the table
  67   below uses the TERMINAL macro.  */
  68#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
  69            (json_lexer[(old_state)][0] == (terminal))
  70
  71static const uint8_t json_lexer[][256] =  {
  72    /* double quote string */
  73    [IN_DQ_UCODE3] = {
  74        ['0' ... '9'] = IN_DQ_STRING,
  75        ['a' ... 'f'] = IN_DQ_STRING,
  76        ['A' ... 'F'] = IN_DQ_STRING,
  77    },
  78    [IN_DQ_UCODE2] = {
  79        ['0' ... '9'] = IN_DQ_UCODE3,
  80        ['a' ... 'f'] = IN_DQ_UCODE3,
  81        ['A' ... 'F'] = IN_DQ_UCODE3,
  82    },
  83    [IN_DQ_UCODE1] = {
  84        ['0' ... '9'] = IN_DQ_UCODE2,
  85        ['a' ... 'f'] = IN_DQ_UCODE2,
  86        ['A' ... 'F'] = IN_DQ_UCODE2,
  87    },
  88    [IN_DQ_UCODE0] = {
  89        ['0' ... '9'] = IN_DQ_UCODE1,
  90        ['a' ... 'f'] = IN_DQ_UCODE1,
  91        ['A' ... 'F'] = IN_DQ_UCODE1,
  92    },
  93    [IN_DQ_STRING_ESCAPE] = {
  94        ['b'] = IN_DQ_STRING,
  95        ['f'] =  IN_DQ_STRING,
  96        ['n'] =  IN_DQ_STRING,
  97        ['r'] =  IN_DQ_STRING,
  98        ['t'] =  IN_DQ_STRING,
  99        ['/'] = IN_DQ_STRING,
 100        ['\\'] = IN_DQ_STRING,
 101        ['\''] = IN_DQ_STRING,
 102        ['\"'] = IN_DQ_STRING,
 103        ['u'] = IN_DQ_UCODE0,
 104    },
 105    [IN_DQ_STRING] = {
 106        [1 ... 0xFF] = IN_DQ_STRING,
 107        ['\\'] = IN_DQ_STRING_ESCAPE,
 108        ['"'] = JSON_STRING,
 109    },
 110
 111    /* single quote string */
 112    [IN_SQ_UCODE3] = {
 113        ['0' ... '9'] = IN_SQ_STRING,
 114        ['a' ... 'f'] = IN_SQ_STRING,
 115        ['A' ... 'F'] = IN_SQ_STRING,
 116    },
 117    [IN_SQ_UCODE2] = {
 118        ['0' ... '9'] = IN_SQ_UCODE3,
 119        ['a' ... 'f'] = IN_SQ_UCODE3,
 120        ['A' ... 'F'] = IN_SQ_UCODE3,
 121    },
 122    [IN_SQ_UCODE1] = {
 123        ['0' ... '9'] = IN_SQ_UCODE2,
 124        ['a' ... 'f'] = IN_SQ_UCODE2,
 125        ['A' ... 'F'] = IN_SQ_UCODE2,
 126    },
 127    [IN_SQ_UCODE0] = {
 128        ['0' ... '9'] = IN_SQ_UCODE1,
 129        ['a' ... 'f'] = IN_SQ_UCODE1,
 130        ['A' ... 'F'] = IN_SQ_UCODE1,
 131    },
 132    [IN_SQ_STRING_ESCAPE] = {
 133        ['b'] = IN_SQ_STRING,
 134        ['f'] =  IN_SQ_STRING,
 135        ['n'] =  IN_SQ_STRING,
 136        ['r'] =  IN_SQ_STRING,
 137        ['t'] =  IN_SQ_STRING,
 138        ['/'] = IN_DQ_STRING,
 139        ['\\'] = IN_DQ_STRING,
 140        ['\''] = IN_SQ_STRING,
 141        ['\"'] = IN_SQ_STRING,
 142        ['u'] = IN_SQ_UCODE0,
 143    },
 144    [IN_SQ_STRING] = {
 145        [1 ... 0xFF] = IN_SQ_STRING,
 146        ['\\'] = IN_SQ_STRING_ESCAPE,
 147        ['\''] = JSON_STRING,
 148    },
 149
 150    /* Zero */
 151    [IN_ZERO] = {
 152        TERMINAL(JSON_INTEGER),
 153        ['0' ... '9'] = ERROR,
 154        ['.'] = IN_MANTISSA,
 155    },
 156
 157    /* Float */
 158    [IN_DIGITS] = {
 159        TERMINAL(JSON_FLOAT),
 160        ['0' ... '9'] = IN_DIGITS,
 161    },
 162
 163    [IN_DIGIT] = {
 164        ['0' ... '9'] = IN_DIGITS,
 165    },
 166
 167    [IN_EXP_E] = {
 168        ['-'] = IN_DIGIT,
 169        ['+'] = IN_DIGIT,
 170        ['0' ... '9'] = IN_DIGITS,
 171    },
 172
 173    [IN_MANTISSA_DIGITS] = {
 174        TERMINAL(JSON_FLOAT),
 175        ['0' ... '9'] = IN_MANTISSA_DIGITS,
 176        ['e'] = IN_EXP_E,
 177        ['E'] = IN_EXP_E,
 178    },
 179
 180    [IN_MANTISSA] = {
 181        ['0' ... '9'] = IN_MANTISSA_DIGITS,
 182    },
 183
 184    /* Number */
 185    [IN_NONZERO_NUMBER] = {
 186        TERMINAL(JSON_INTEGER),
 187        ['0' ... '9'] = IN_NONZERO_NUMBER,
 188        ['e'] = IN_EXP_E,
 189        ['E'] = IN_EXP_E,
 190        ['.'] = IN_MANTISSA,
 191    },
 192
 193    [IN_NEG_NONZERO_NUMBER] = {
 194        ['0'] = IN_ZERO,
 195        ['1' ... '9'] = IN_NONZERO_NUMBER,
 196    },
 197
 198    /* keywords */
 199    [IN_KEYWORD] = {
 200        TERMINAL(JSON_KEYWORD),
 201        ['a' ... 'z'] = IN_KEYWORD,
 202    },
 203
 204    /* whitespace */
 205    [IN_WHITESPACE] = {
 206        TERMINAL(JSON_SKIP),
 207        [' '] = IN_WHITESPACE,
 208        ['\t'] = IN_WHITESPACE,
 209        ['\r'] = IN_WHITESPACE,
 210        ['\n'] = IN_WHITESPACE,
 211    },        
 212
 213    /* escape */
 214    [IN_ESCAPE_LL] = {
 215        ['d'] = JSON_ESCAPE,
 216    },
 217
 218    [IN_ESCAPE_L] = {
 219        ['d'] = JSON_ESCAPE,
 220        ['l'] = IN_ESCAPE_LL,
 221    },
 222
 223    [IN_ESCAPE_I64] = {
 224        ['d'] = JSON_ESCAPE,
 225    },
 226
 227    [IN_ESCAPE_I6] = {
 228        ['4'] = IN_ESCAPE_I64,
 229    },
 230
 231    [IN_ESCAPE_I] = {
 232        ['6'] = IN_ESCAPE_I6,
 233    },
 234
 235    [IN_ESCAPE] = {
 236        ['d'] = JSON_ESCAPE,
 237        ['i'] = JSON_ESCAPE,
 238        ['p'] = JSON_ESCAPE,
 239        ['s'] = JSON_ESCAPE,
 240        ['f'] = JSON_ESCAPE,
 241        ['l'] = IN_ESCAPE_L,
 242        ['I'] = IN_ESCAPE_I,
 243    },
 244
 245    /* top level rule */
 246    [IN_START] = {
 247        ['"'] = IN_DQ_STRING,
 248        ['\''] = IN_SQ_STRING,
 249        ['0'] = IN_ZERO,
 250        ['1' ... '9'] = IN_NONZERO_NUMBER,
 251        ['-'] = IN_NEG_NONZERO_NUMBER,
 252        ['{'] = JSON_OPERATOR,
 253        ['}'] = JSON_OPERATOR,
 254        ['['] = JSON_OPERATOR,
 255        [']'] = JSON_OPERATOR,
 256        [','] = JSON_OPERATOR,
 257        [':'] = JSON_OPERATOR,
 258        ['a' ... 'z'] = IN_KEYWORD,
 259        ['%'] = IN_ESCAPE,
 260        [' '] = IN_WHITESPACE,
 261        ['\t'] = IN_WHITESPACE,
 262        ['\r'] = IN_WHITESPACE,
 263        ['\n'] = IN_WHITESPACE,
 264    },
 265};
 266
 267void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
 268{
 269    lexer->emit = func;
 270    lexer->state = IN_START;
 271    lexer->token = qstring_new();
 272    lexer->x = lexer->y = 0;
 273}
 274
 275static int json_lexer_feed_char(JSONLexer *lexer, char ch)
 276{
 277    int char_consumed, new_state;
 278
 279    lexer->x++;
 280    if (ch == '\n') {
 281        lexer->x = 0;
 282        lexer->y++;
 283    }
 284
 285    do {
 286        new_state = json_lexer[lexer->state][(uint8_t)ch];
 287        char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
 288        if (char_consumed) {
 289            qstring_append_chr(lexer->token, ch);
 290        }
 291
 292        switch (new_state) {
 293        case JSON_OPERATOR:
 294        case JSON_ESCAPE:
 295        case JSON_INTEGER:
 296        case JSON_FLOAT:
 297        case JSON_KEYWORD:
 298        case JSON_STRING:
 299            lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
 300        case JSON_SKIP:
 301            QDECREF(lexer->token);
 302            lexer->token = qstring_new();
 303            new_state = IN_START;
 304            break;
 305        case ERROR:
 306            return -EINVAL;
 307        default:
 308            break;
 309        }
 310        lexer->state = new_state;
 311    } while (!char_consumed);
 312    return 0;
 313}
 314
 315int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
 316{
 317    size_t i;
 318
 319    for (i = 0; i < size; i++) {
 320        int err;
 321
 322        err = json_lexer_feed_char(lexer, buffer[i]);
 323        if (err < 0) {
 324            return err;
 325        }
 326    }
 327
 328    return 0;
 329}
 330
 331int json_lexer_flush(JSONLexer *lexer)
 332{
 333    return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0);
 334}
 335
 336void json_lexer_destroy(JSONLexer *lexer)
 337{
 338    QDECREF(lexer->token);
 339}
 340