qemu/qobject/json-parser.c
<<
>>
Prefs
   1/*
   2 * JSON Parser
   3 *
   4 * Copyright IBM, Corp. 2009
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10 * See the COPYING.LIB file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/ctype.h"
  16#include "qemu/cutils.h"
  17#include "qemu/unicode.h"
  18#include "qapi/error.h"
  19#include "qapi/qmp/qbool.h"
  20#include "qapi/qmp/qdict.h"
  21#include "qapi/qmp/qlist.h"
  22#include "qapi/qmp/qnull.h"
  23#include "qapi/qmp/qnum.h"
  24#include "qapi/qmp/qstring.h"
  25#include "json-parser-int.h"
  26
  27struct JSONToken {
  28    JSONTokenType type;
  29    int x;
  30    int y;
  31    char str[];
  32};
  33
  34typedef struct JSONParserContext {
  35    Error *err;
  36    JSONToken *current;
  37    GQueue *buf;
  38    va_list *ap;
  39} JSONParserContext;
  40
  41#define BUG_ON(cond) assert(!(cond))
  42
  43/**
  44 * TODO
  45 *
  46 * 0) make errors meaningful again
  47 * 1) add geometry information to tokens
  48 * 3) should we return a parsed size?
  49 * 4) deal with premature EOI
  50 */
  51
  52static QObject *parse_value(JSONParserContext *ctxt);
  53
  54/**
  55 * Error handler
  56 */
  57static void G_GNUC_PRINTF(3, 4) parse_error(JSONParserContext *ctxt,
  58                                           JSONToken *token, const char *msg, ...)
  59{
  60    va_list ap;
  61    char message[1024];
  62
  63    if (ctxt->err) {
  64        return;
  65    }
  66    va_start(ap, msg);
  67    vsnprintf(message, sizeof(message), msg, ap);
  68    va_end(ap);
  69    error_setg(&ctxt->err, "JSON parse error, %s", message);
  70}
  71
  72static int cvt4hex(const char *s)
  73{
  74    int cp, i;
  75
  76    cp = 0;
  77    for (i = 0; i < 4; i++) {
  78        if (!qemu_isxdigit(s[i])) {
  79            return -1;
  80        }
  81        cp <<= 4;
  82        if (s[i] >= '0' && s[i] <= '9') {
  83            cp |= s[i] - '0';
  84        } else if (s[i] >= 'a' && s[i] <= 'f') {
  85            cp |= 10 + s[i] - 'a';
  86        } else if (s[i] >= 'A' && s[i] <= 'F') {
  87            cp |= 10 + s[i] - 'A';
  88        } else {
  89            return -1;
  90        }
  91    }
  92    return cp;
  93}
  94
  95/**
  96 * parse_string(): Parse a JSON string
  97 *
  98 * From RFC 8259 "The JavaScript Object Notation (JSON) Data
  99 * Interchange Format":
 100 *
 101 *    char = unescaped /
 102 *        escape (
 103 *            %x22 /          ; "    quotation mark  U+0022
 104 *            %x5C /          ; \    reverse solidus U+005C
 105 *            %x2F /          ; /    solidus         U+002F
 106 *            %x62 /          ; b    backspace       U+0008
 107 *            %x66 /          ; f    form feed       U+000C
 108 *            %x6E /          ; n    line feed       U+000A
 109 *            %x72 /          ; r    carriage return U+000D
 110 *            %x74 /          ; t    tab             U+0009
 111 *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
 112 *    escape = %x5C              ; \
 113 *    quotation-mark = %x22      ; "
 114 *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
 115 *
 116 * Extensions over RFC 8259:
 117 * - Extra escape sequence in strings:
 118 *   0x27 (apostrophe) is recognized after escape, too
 119 * - Single-quoted strings:
 120 *   Like double-quoted strings, except they're delimited by %x27
 121 *   (apostrophe) instead of %x22 (quotation mark), and can't contain
 122 *   unescaped apostrophe, but can contain unescaped quotation mark.
 123 *
 124 * Note:
 125 * - Encoding is modified UTF-8.
 126 * - Invalid Unicode characters are rejected.
 127 * - Control characters \x00..\x1F are rejected by the lexer.
 128 */
 129static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
 130{
 131    const char *ptr = token->str;
 132    GString *str;
 133    char quote;
 134    const char *beg;
 135    int cp, trailing;
 136    char *end;
 137    ssize_t len;
 138    char utf8_buf[5];
 139
 140    assert(*ptr == '"' || *ptr == '\'');
 141    quote = *ptr++;
 142    str = g_string_new(NULL);
 143
 144    while (*ptr != quote) {
 145        assert(*ptr);
 146        switch (*ptr) {
 147        case '\\':
 148            beg = ptr++;
 149            switch (*ptr++) {
 150            case '"':
 151                g_string_append_c(str, '"');
 152                break;
 153            case '\'':
 154                g_string_append_c(str, '\'');
 155                break;
 156            case '\\':
 157                g_string_append_c(str, '\\');
 158                break;
 159            case '/':
 160                g_string_append_c(str, '/');
 161                break;
 162            case 'b':
 163                g_string_append_c(str, '\b');
 164                break;
 165            case 'f':
 166                g_string_append_c(str, '\f');
 167                break;
 168            case 'n':
 169                g_string_append_c(str, '\n');
 170                break;
 171            case 'r':
 172                g_string_append_c(str, '\r');
 173                break;
 174            case 't':
 175                g_string_append_c(str, '\t');
 176                break;
 177            case 'u':
 178                cp = cvt4hex(ptr);
 179                ptr += 4;
 180
 181                /* handle surrogate pairs */
 182                if (cp >= 0xD800 && cp <= 0xDBFF
 183                    && ptr[0] == '\\' && ptr[1] == 'u') {
 184                    /* leading surrogate followed by \u */
 185                    cp = 0x10000 + ((cp & 0x3FF) << 10);
 186                    trailing = cvt4hex(ptr + 2);
 187                    if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
 188                        /* followed by trailing surrogate */
 189                        cp |= trailing & 0x3FF;
 190                        ptr += 6;
 191                    } else {
 192                        cp = -1; /* invalid */
 193                    }
 194                }
 195
 196                if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
 197                    parse_error(ctxt, token,
 198                                "%.*s is not a valid Unicode character",
 199                                (int)(ptr - beg), beg);
 200                    goto out;
 201                }
 202                g_string_append(str, utf8_buf);
 203                break;
 204            default:
 205                parse_error(ctxt, token, "invalid escape sequence in string");
 206                goto out;
 207            }
 208            break;
 209        case '%':
 210            if (ctxt->ap) {
 211                if (ptr[1] != '%') {
 212                    parse_error(ctxt, token, "can't interpolate into string");
 213                    goto out;
 214                }
 215                ptr++;
 216            }
 217            /* fall through */
 218        default:
 219            cp = mod_utf8_codepoint(ptr, 6, &end);
 220            if (cp < 0) {
 221                parse_error(ctxt, token, "invalid UTF-8 sequence in string");
 222                goto out;
 223            }
 224            ptr = end;
 225            len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
 226            assert(len >= 0);
 227            g_string_append(str, utf8_buf);
 228        }
 229    }
 230
 231    return qstring_from_gstring(str);
 232
 233out:
 234    g_string_free(str, true);
 235    return NULL;
 236}
 237
 238/* Note: the token object returned by parser_context_peek_token or
 239 * parser_context_pop_token is deleted as soon as parser_context_pop_token
 240 * is called again.
 241 */
 242static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
 243{
 244    g_free(ctxt->current);
 245    ctxt->current = g_queue_pop_head(ctxt->buf);
 246    return ctxt->current;
 247}
 248
 249static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
 250{
 251    return g_queue_peek_head(ctxt->buf);
 252}
 253
 254/**
 255 * Parsing rules
 256 */
 257static int parse_pair(JSONParserContext *ctxt, QDict *dict)
 258{
 259    QObject *key_obj = NULL;
 260    QString *key;
 261    QObject *value;
 262    JSONToken *peek, *token;
 263
 264    peek = parser_context_peek_token(ctxt);
 265    if (peek == NULL) {
 266        parse_error(ctxt, NULL, "premature EOI");
 267        goto out;
 268    }
 269
 270    key_obj = parse_value(ctxt);
 271    key = qobject_to(QString, key_obj);
 272    if (!key) {
 273        parse_error(ctxt, peek, "key is not a string in object");
 274        goto out;
 275    }
 276
 277    token = parser_context_pop_token(ctxt);
 278    if (token == NULL) {
 279        parse_error(ctxt, NULL, "premature EOI");
 280        goto out;
 281    }
 282
 283    if (token->type != JSON_COLON) {
 284        parse_error(ctxt, token, "missing : in object pair");
 285        goto out;
 286    }
 287
 288    value = parse_value(ctxt);
 289    if (value == NULL) {
 290        parse_error(ctxt, token, "Missing value in dict");
 291        goto out;
 292    }
 293
 294    if (qdict_haskey(dict, qstring_get_str(key))) {
 295        parse_error(ctxt, token, "duplicate key");
 296        goto out;
 297    }
 298
 299    qdict_put_obj(dict, qstring_get_str(key), value);
 300
 301    qobject_unref(key_obj);
 302    return 0;
 303
 304out:
 305    qobject_unref(key_obj);
 306    return -1;
 307}
 308
 309static QObject *parse_object(JSONParserContext *ctxt)
 310{
 311    QDict *dict = NULL;
 312    JSONToken *token, *peek;
 313
 314    token = parser_context_pop_token(ctxt);
 315    assert(token && token->type == JSON_LCURLY);
 316
 317    dict = qdict_new();
 318
 319    peek = parser_context_peek_token(ctxt);
 320    if (peek == NULL) {
 321        parse_error(ctxt, NULL, "premature EOI");
 322        goto out;
 323    }
 324
 325    if (peek->type != JSON_RCURLY) {
 326        if (parse_pair(ctxt, dict) == -1) {
 327            goto out;
 328        }
 329
 330        token = parser_context_pop_token(ctxt);
 331        if (token == NULL) {
 332            parse_error(ctxt, NULL, "premature EOI");
 333            goto out;
 334        }
 335
 336        while (token->type != JSON_RCURLY) {
 337            if (token->type != JSON_COMMA) {
 338                parse_error(ctxt, token, "expected separator in dict");
 339                goto out;
 340            }
 341
 342            if (parse_pair(ctxt, dict) == -1) {
 343                goto out;
 344            }
 345
 346            token = parser_context_pop_token(ctxt);
 347            if (token == NULL) {
 348                parse_error(ctxt, NULL, "premature EOI");
 349                goto out;
 350            }
 351        }
 352    } else {
 353        (void)parser_context_pop_token(ctxt);
 354    }
 355
 356    return QOBJECT(dict);
 357
 358out:
 359    qobject_unref(dict);
 360    return NULL;
 361}
 362
 363static QObject *parse_array(JSONParserContext *ctxt)
 364{
 365    QList *list = NULL;
 366    JSONToken *token, *peek;
 367
 368    token = parser_context_pop_token(ctxt);
 369    assert(token && token->type == JSON_LSQUARE);
 370
 371    list = qlist_new();
 372
 373    peek = parser_context_peek_token(ctxt);
 374    if (peek == NULL) {
 375        parse_error(ctxt, NULL, "premature EOI");
 376        goto out;
 377    }
 378
 379    if (peek->type != JSON_RSQUARE) {
 380        QObject *obj;
 381
 382        obj = parse_value(ctxt);
 383        if (obj == NULL) {
 384            parse_error(ctxt, token, "expecting value");
 385            goto out;
 386        }
 387
 388        qlist_append_obj(list, obj);
 389
 390        token = parser_context_pop_token(ctxt);
 391        if (token == NULL) {
 392            parse_error(ctxt, NULL, "premature EOI");
 393            goto out;
 394        }
 395
 396        while (token->type != JSON_RSQUARE) {
 397            if (token->type != JSON_COMMA) {
 398                parse_error(ctxt, token, "expected separator in list");
 399                goto out;
 400            }
 401
 402            obj = parse_value(ctxt);
 403            if (obj == NULL) {
 404                parse_error(ctxt, token, "expecting value");
 405                goto out;
 406            }
 407
 408            qlist_append_obj(list, obj);
 409
 410            token = parser_context_pop_token(ctxt);
 411            if (token == NULL) {
 412                parse_error(ctxt, NULL, "premature EOI");
 413                goto out;
 414            }
 415        }
 416    } else {
 417        (void)parser_context_pop_token(ctxt);
 418    }
 419
 420    return QOBJECT(list);
 421
 422out:
 423    qobject_unref(list);
 424    return NULL;
 425}
 426
 427static QObject *parse_keyword(JSONParserContext *ctxt)
 428{
 429    JSONToken *token;
 430
 431    token = parser_context_pop_token(ctxt);
 432    assert(token && token->type == JSON_KEYWORD);
 433
 434    if (!strcmp(token->str, "true")) {
 435        return QOBJECT(qbool_from_bool(true));
 436    } else if (!strcmp(token->str, "false")) {
 437        return QOBJECT(qbool_from_bool(false));
 438    } else if (!strcmp(token->str, "null")) {
 439        return QOBJECT(qnull());
 440    }
 441    parse_error(ctxt, token, "invalid keyword '%s'", token->str);
 442    return NULL;
 443}
 444
 445static QObject *parse_interpolation(JSONParserContext *ctxt)
 446{
 447    JSONToken *token;
 448
 449    token = parser_context_pop_token(ctxt);
 450    assert(token && token->type == JSON_INTERP);
 451
 452    if (!strcmp(token->str, "%p")) {
 453        return va_arg(*ctxt->ap, QObject *);
 454    } else if (!strcmp(token->str, "%i")) {
 455        return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
 456    } else if (!strcmp(token->str, "%d")) {
 457        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
 458    } else if (!strcmp(token->str, "%ld")) {
 459        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
 460    } else if (!strcmp(token->str, "%lld")) {
 461        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
 462    } else if (!strcmp(token->str, "%" PRId64)) {
 463        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
 464    } else if (!strcmp(token->str, "%u")) {
 465        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
 466    } else if (!strcmp(token->str, "%lu")) {
 467        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
 468    } else if (!strcmp(token->str, "%llu")) {
 469        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
 470    } else if (!strcmp(token->str, "%" PRIu64)) {
 471        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
 472    } else if (!strcmp(token->str, "%s")) {
 473        return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
 474    } else if (!strcmp(token->str, "%f")) {
 475        return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
 476    }
 477    parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
 478    return NULL;
 479}
 480
 481static QObject *parse_literal(JSONParserContext *ctxt)
 482{
 483    JSONToken *token;
 484
 485    token = parser_context_pop_token(ctxt);
 486    assert(token);
 487
 488    switch (token->type) {
 489    case JSON_STRING:
 490        return QOBJECT(parse_string(ctxt, token));
 491    case JSON_INTEGER: {
 492        /*
 493         * Represent JSON_INTEGER as QNUM_I64 if possible, else as
 494         * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
 495         * and qemu_strtou64() fail with ERANGE when it's not
 496         * possible.
 497         *
 498         * qnum_get_int() will then work for any signed 64-bit
 499         * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
 500         * integer, and qnum_get_double() both for any JSON_INTEGER
 501         * and any JSON_FLOAT (with precision loss for integers beyond
 502         * 53 bits)
 503         */
 504        int ret;
 505        int64_t value;
 506        uint64_t uvalue;
 507
 508        ret = qemu_strtoi64(token->str, NULL, 10, &value);
 509        if (!ret) {
 510            return QOBJECT(qnum_from_int(value));
 511        }
 512        assert(ret == -ERANGE);
 513
 514        if (token->str[0] != '-') {
 515            ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
 516            if (!ret) {
 517                return QOBJECT(qnum_from_uint(uvalue));
 518            }
 519            assert(ret == -ERANGE);
 520        }
 521    }
 522    /* fall through to JSON_FLOAT */
 523    case JSON_FLOAT:
 524        /* FIXME dependent on locale; a pervasive issue in QEMU */
 525        /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
 526         * but those might be useful extensions beyond JSON */
 527        return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
 528    default:
 529        abort();
 530    }
 531}
 532
 533static QObject *parse_value(JSONParserContext *ctxt)
 534{
 535    JSONToken *token;
 536
 537    token = parser_context_peek_token(ctxt);
 538    if (token == NULL) {
 539        parse_error(ctxt, NULL, "premature EOI");
 540        return NULL;
 541    }
 542
 543    switch (token->type) {
 544    case JSON_LCURLY:
 545        return parse_object(ctxt);
 546    case JSON_LSQUARE:
 547        return parse_array(ctxt);
 548    case JSON_INTERP:
 549        return parse_interpolation(ctxt);
 550    case JSON_INTEGER:
 551    case JSON_FLOAT:
 552    case JSON_STRING:
 553        return parse_literal(ctxt);
 554    case JSON_KEYWORD:
 555        return parse_keyword(ctxt);
 556    default:
 557        parse_error(ctxt, token, "expecting value");
 558        return NULL;
 559    }
 560}
 561
 562JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
 563{
 564    JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
 565
 566    token->type = type;
 567    memcpy(token->str, tokstr->str, tokstr->len);
 568    token->str[tokstr->len] = 0;
 569    token->x = x;
 570    token->y = y;
 571    return token;
 572}
 573
 574QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
 575{
 576    JSONParserContext ctxt = { .buf = tokens, .ap = ap };
 577    QObject *result;
 578
 579    result = parse_value(&ctxt);
 580    assert(ctxt.err || g_queue_is_empty(ctxt.buf));
 581
 582    error_propagate(errp, ctxt.err);
 583
 584    while (!g_queue_is_empty(ctxt.buf)) {
 585        parser_context_pop_token(&ctxt);
 586    }
 587    g_free(ctxt.current);
 588
 589    return result;
 590}
 591