qemu/qobject/json-parser.c
<<
>>
Prefs
   1/*
   2 * JSON Parser
   3 *
   4 * Copyright IBM, Corp. 2009
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10 * See the COPYING.LIB file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/cutils.h"
  16#include "qemu/unicode.h"
  17#include "qapi/error.h"
  18#include "qemu-common.h"
  19#include "qapi/qmp/qbool.h"
  20#include "qapi/qmp/qdict.h"
  21#include "qapi/qmp/qlist.h"
  22#include "qapi/qmp/qnull.h"
  23#include "qapi/qmp/qnum.h"
  24#include "qapi/qmp/qstring.h"
  25#include "json-parser-int.h"
  26
  27struct JSONToken {
  28    JSONTokenType type;
  29    int x;
  30    int y;
  31    char str[];
  32};
  33
  34typedef struct JSONParserContext
  35{
  36    Error *err;
  37    JSONToken *current;
  38    GQueue *buf;
  39    va_list *ap;
  40} JSONParserContext;
  41
  42#define BUG_ON(cond) assert(!(cond))
  43
  44/**
  45 * TODO
  46 *
  47 * 0) make errors meaningful again
  48 * 1) add geometry information to tokens
  49 * 3) should we return a parsed size?
  50 * 4) deal with premature EOI
  51 */
  52
  53static QObject *parse_value(JSONParserContext *ctxt);
  54
  55/**
  56 * Error handler
  57 */
  58static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
  59                                           JSONToken *token, const char *msg, ...)
  60{
  61    va_list ap;
  62    char message[1024];
  63
  64    if (ctxt->err) {
  65        return;
  66    }
  67    va_start(ap, msg);
  68    vsnprintf(message, sizeof(message), msg, ap);
  69    va_end(ap);
  70    error_setg(&ctxt->err, "JSON parse error, %s", message);
  71}
  72
  73static int cvt4hex(const char *s)
  74{
  75    int cp, i;
  76
  77    cp = 0;
  78    for (i = 0; i < 4; i++) {
  79        if (!qemu_isxdigit(s[i])) {
  80            return -1;
  81        }
  82        cp <<= 4;
  83        if (s[i] >= '0' && s[i] <= '9') {
  84            cp |= s[i] - '0';
  85        } else if (s[i] >= 'a' && s[i] <= 'f') {
  86            cp |= 10 + s[i] - 'a';
  87        } else if (s[i] >= 'A' && s[i] <= 'F') {
  88            cp |= 10 + s[i] - 'A';
  89        } else {
  90            return -1;
  91        }
  92    }
  93    return cp;
  94}
  95
  96/**
  97 * parse_string(): Parse a JSON string
  98 *
  99 * From RFC 8259 "The JavaScript Object Notation (JSON) Data
 100 * Interchange Format":
 101 *
 102 *    char = unescaped /
 103 *        escape (
 104 *            %x22 /          ; "    quotation mark  U+0022
 105 *            %x5C /          ; \    reverse solidus U+005C
 106 *            %x2F /          ; /    solidus         U+002F
 107 *            %x62 /          ; b    backspace       U+0008
 108 *            %x66 /          ; f    form feed       U+000C
 109 *            %x6E /          ; n    line feed       U+000A
 110 *            %x72 /          ; r    carriage return U+000D
 111 *            %x74 /          ; t    tab             U+0009
 112 *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
 113 *    escape = %x5C              ; \
 114 *    quotation-mark = %x22      ; "
 115 *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
 116 *
 117 * Extensions over RFC 8259:
 118 * - Extra escape sequence in strings:
 119 *   0x27 (apostrophe) is recognized after escape, too
 120 * - Single-quoted strings:
 121 *   Like double-quoted strings, except they're delimited by %x27
 122 *   (apostrophe) instead of %x22 (quotation mark), and can't contain
 123 *   unescaped apostrophe, but can contain unescaped quotation mark.
 124 *
 125 * Note:
 126 * - Encoding is modified UTF-8.
 127 * - Invalid Unicode characters are rejected.
 128 * - Control characters \x00..\x1F are rejected by the lexer.
 129 */
 130static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
 131{
 132    const char *ptr = token->str;
 133    QString *str;
 134    char quote;
 135    const char *beg;
 136    int cp, trailing;
 137    char *end;
 138    ssize_t len;
 139    char utf8_buf[5];
 140
 141    assert(*ptr == '"' || *ptr == '\'');
 142    quote = *ptr++;
 143    str = qstring_new();
 144
 145    while (*ptr != quote) {
 146        assert(*ptr);
 147        switch (*ptr) {
 148        case '\\':
 149            beg = ptr++;
 150            switch (*ptr++) {
 151            case '"':
 152                qstring_append_chr(str, '"');
 153                break;
 154            case '\'':
 155                qstring_append_chr(str, '\'');
 156                break;
 157            case '\\':
 158                qstring_append_chr(str, '\\');
 159                break;
 160            case '/':
 161                qstring_append_chr(str, '/');
 162                break;
 163            case 'b':
 164                qstring_append_chr(str, '\b');
 165                break;
 166            case 'f':
 167                qstring_append_chr(str, '\f');
 168                break;
 169            case 'n':
 170                qstring_append_chr(str, '\n');
 171                break;
 172            case 'r':
 173                qstring_append_chr(str, '\r');
 174                break;
 175            case 't':
 176                qstring_append_chr(str, '\t');
 177                break;
 178            case 'u':
 179                cp = cvt4hex(ptr);
 180                ptr += 4;
 181
 182                /* handle surrogate pairs */
 183                if (cp >= 0xD800 && cp <= 0xDBFF
 184                    && ptr[0] == '\\' && ptr[1] == 'u') {
 185                    /* leading surrogate followed by \u */
 186                    cp = 0x10000 + ((cp & 0x3FF) << 10);
 187                    trailing = cvt4hex(ptr + 2);
 188                    if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
 189                        /* followed by trailing surrogate */
 190                        cp |= trailing & 0x3FF;
 191                        ptr += 6;
 192                    } else {
 193                        cp = -1; /* invalid */
 194                    }
 195                }
 196
 197                if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
 198                    parse_error(ctxt, token,
 199                                "%.*s is not a valid Unicode character",
 200                                (int)(ptr - beg), beg);
 201                    goto out;
 202                }
 203                qstring_append(str, utf8_buf);
 204                break;
 205            default:
 206                parse_error(ctxt, token, "invalid escape sequence in string");
 207                goto out;
 208            }
 209            break;
 210        case '%':
 211            if (ctxt->ap) {
 212                if (ptr[1] != '%') {
 213                    parse_error(ctxt, token, "can't interpolate into string");
 214                    goto out;
 215                }
 216                ptr++;
 217            }
 218            /* fall through */
 219        default:
 220            cp = mod_utf8_codepoint(ptr, 6, &end);
 221            if (cp < 0) {
 222                parse_error(ctxt, token, "invalid UTF-8 sequence in string");
 223                goto out;
 224            }
 225            ptr = end;
 226            len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
 227            assert(len >= 0);
 228            qstring_append(str, utf8_buf);
 229        }
 230    }
 231
 232    return str;
 233
 234out:
 235    qobject_unref(str);
 236    return NULL;
 237}
 238
 239/* Note: the token object returned by parser_context_peek_token or
 240 * parser_context_pop_token is deleted as soon as parser_context_pop_token
 241 * is called again.
 242 */
 243static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
 244{
 245    g_free(ctxt->current);
 246    ctxt->current = g_queue_pop_head(ctxt->buf);
 247    return ctxt->current;
 248}
 249
 250static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
 251{
 252    return g_queue_peek_head(ctxt->buf);
 253}
 254
 255/**
 256 * Parsing rules
 257 */
 258static int parse_pair(JSONParserContext *ctxt, QDict *dict)
 259{
 260    QObject *value;
 261    QString *key = NULL;
 262    JSONToken *peek, *token;
 263
 264    peek = parser_context_peek_token(ctxt);
 265    if (peek == NULL) {
 266        parse_error(ctxt, NULL, "premature EOI");
 267        goto out;
 268    }
 269
 270    key = qobject_to(QString, parse_value(ctxt));
 271    if (!key) {
 272        parse_error(ctxt, peek, "key is not a string in object");
 273        goto out;
 274    }
 275
 276    token = parser_context_pop_token(ctxt);
 277    if (token == NULL) {
 278        parse_error(ctxt, NULL, "premature EOI");
 279        goto out;
 280    }
 281
 282    if (token->type != JSON_COLON) {
 283        parse_error(ctxt, token, "missing : in object pair");
 284        goto out;
 285    }
 286
 287    value = parse_value(ctxt);
 288    if (value == NULL) {
 289        parse_error(ctxt, token, "Missing value in dict");
 290        goto out;
 291    }
 292
 293    if (qdict_haskey(dict, qstring_get_str(key))) {
 294        parse_error(ctxt, token, "duplicate key");
 295        goto out;
 296    }
 297
 298    qdict_put_obj(dict, qstring_get_str(key), value);
 299
 300    qobject_unref(key);
 301
 302    return 0;
 303
 304out:
 305    qobject_unref(key);
 306
 307    return -1;
 308}
 309
 310static QObject *parse_object(JSONParserContext *ctxt)
 311{
 312    QDict *dict = NULL;
 313    JSONToken *token, *peek;
 314
 315    token = parser_context_pop_token(ctxt);
 316    assert(token && token->type == JSON_LCURLY);
 317
 318    dict = qdict_new();
 319
 320    peek = parser_context_peek_token(ctxt);
 321    if (peek == NULL) {
 322        parse_error(ctxt, NULL, "premature EOI");
 323        goto out;
 324    }
 325
 326    if (peek->type != JSON_RCURLY) {
 327        if (parse_pair(ctxt, dict) == -1) {
 328            goto out;
 329        }
 330
 331        token = parser_context_pop_token(ctxt);
 332        if (token == NULL) {
 333            parse_error(ctxt, NULL, "premature EOI");
 334            goto out;
 335        }
 336
 337        while (token->type != JSON_RCURLY) {
 338            if (token->type != JSON_COMMA) {
 339                parse_error(ctxt, token, "expected separator in dict");
 340                goto out;
 341            }
 342
 343            if (parse_pair(ctxt, dict) == -1) {
 344                goto out;
 345            }
 346
 347            token = parser_context_pop_token(ctxt);
 348            if (token == NULL) {
 349                parse_error(ctxt, NULL, "premature EOI");
 350                goto out;
 351            }
 352        }
 353    } else {
 354        (void)parser_context_pop_token(ctxt);
 355    }
 356
 357    return QOBJECT(dict);
 358
 359out:
 360    qobject_unref(dict);
 361    return NULL;
 362}
 363
 364static QObject *parse_array(JSONParserContext *ctxt)
 365{
 366    QList *list = NULL;
 367    JSONToken *token, *peek;
 368
 369    token = parser_context_pop_token(ctxt);
 370    assert(token && token->type == JSON_LSQUARE);
 371
 372    list = qlist_new();
 373
 374    peek = parser_context_peek_token(ctxt);
 375    if (peek == NULL) {
 376        parse_error(ctxt, NULL, "premature EOI");
 377        goto out;
 378    }
 379
 380    if (peek->type != JSON_RSQUARE) {
 381        QObject *obj;
 382
 383        obj = parse_value(ctxt);
 384        if (obj == NULL) {
 385            parse_error(ctxt, token, "expecting value");
 386            goto out;
 387        }
 388
 389        qlist_append_obj(list, obj);
 390
 391        token = parser_context_pop_token(ctxt);
 392        if (token == NULL) {
 393            parse_error(ctxt, NULL, "premature EOI");
 394            goto out;
 395        }
 396
 397        while (token->type != JSON_RSQUARE) {
 398            if (token->type != JSON_COMMA) {
 399                parse_error(ctxt, token, "expected separator in list");
 400                goto out;
 401            }
 402
 403            obj = parse_value(ctxt);
 404            if (obj == NULL) {
 405                parse_error(ctxt, token, "expecting value");
 406                goto out;
 407            }
 408
 409            qlist_append_obj(list, obj);
 410
 411            token = parser_context_pop_token(ctxt);
 412            if (token == NULL) {
 413                parse_error(ctxt, NULL, "premature EOI");
 414                goto out;
 415            }
 416        }
 417    } else {
 418        (void)parser_context_pop_token(ctxt);
 419    }
 420
 421    return QOBJECT(list);
 422
 423out:
 424    qobject_unref(list);
 425    return NULL;
 426}
 427
 428static QObject *parse_keyword(JSONParserContext *ctxt)
 429{
 430    JSONToken *token;
 431
 432    token = parser_context_pop_token(ctxt);
 433    assert(token && token->type == JSON_KEYWORD);
 434
 435    if (!strcmp(token->str, "true")) {
 436        return QOBJECT(qbool_from_bool(true));
 437    } else if (!strcmp(token->str, "false")) {
 438        return QOBJECT(qbool_from_bool(false));
 439    } else if (!strcmp(token->str, "null")) {
 440        return QOBJECT(qnull());
 441    }
 442    parse_error(ctxt, token, "invalid keyword '%s'", token->str);
 443    return NULL;
 444}
 445
 446static QObject *parse_interpolation(JSONParserContext *ctxt)
 447{
 448    JSONToken *token;
 449
 450    token = parser_context_pop_token(ctxt);
 451    assert(token && token->type == JSON_INTERP);
 452
 453    if (!strcmp(token->str, "%p")) {
 454        return va_arg(*ctxt->ap, QObject *);
 455    } else if (!strcmp(token->str, "%i")) {
 456        return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
 457    } else if (!strcmp(token->str, "%d")) {
 458        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
 459    } else if (!strcmp(token->str, "%ld")) {
 460        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
 461    } else if (!strcmp(token->str, "%lld")) {
 462        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
 463    } else if (!strcmp(token->str, "%" PRId64)) {
 464        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
 465    } else if (!strcmp(token->str, "%u")) {
 466        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
 467    } else if (!strcmp(token->str, "%lu")) {
 468        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
 469    } else if (!strcmp(token->str, "%llu")) {
 470        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
 471    } else if (!strcmp(token->str, "%" PRIu64)) {
 472        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
 473    } else if (!strcmp(token->str, "%s")) {
 474        return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
 475    } else if (!strcmp(token->str, "%f")) {
 476        return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
 477    }
 478    parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
 479    return NULL;
 480}
 481
 482static QObject *parse_literal(JSONParserContext *ctxt)
 483{
 484    JSONToken *token;
 485
 486    token = parser_context_pop_token(ctxt);
 487    assert(token);
 488
 489    switch (token->type) {
 490    case JSON_STRING:
 491        return QOBJECT(parse_string(ctxt, token));
 492    case JSON_INTEGER: {
 493        /*
 494         * Represent JSON_INTEGER as QNUM_I64 if possible, else as
 495         * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
 496         * and qemu_strtou64() fail with ERANGE when it's not
 497         * possible.
 498         *
 499         * qnum_get_int() will then work for any signed 64-bit
 500         * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
 501         * integer, and qnum_get_double() both for any JSON_INTEGER
 502         * and any JSON_FLOAT (with precision loss for integers beyond
 503         * 53 bits)
 504         */
 505        int ret;
 506        int64_t value;
 507        uint64_t uvalue;
 508
 509        ret = qemu_strtoi64(token->str, NULL, 10, &value);
 510        if (!ret) {
 511            return QOBJECT(qnum_from_int(value));
 512        }
 513        assert(ret == -ERANGE);
 514
 515        if (token->str[0] != '-') {
 516            ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
 517            if (!ret) {
 518                return QOBJECT(qnum_from_uint(uvalue));
 519            }
 520            assert(ret == -ERANGE);
 521        }
 522        /* fall through to JSON_FLOAT */
 523    }
 524    case JSON_FLOAT:
 525        /* FIXME dependent on locale; a pervasive issue in QEMU */
 526        /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
 527         * but those might be useful extensions beyond JSON */
 528        return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
 529    default:
 530        abort();
 531    }
 532}
 533
 534static QObject *parse_value(JSONParserContext *ctxt)
 535{
 536    JSONToken *token;
 537
 538    token = parser_context_peek_token(ctxt);
 539    if (token == NULL) {
 540        parse_error(ctxt, NULL, "premature EOI");
 541        return NULL;
 542    }
 543
 544    switch (token->type) {
 545    case JSON_LCURLY:
 546        return parse_object(ctxt);
 547    case JSON_LSQUARE:
 548        return parse_array(ctxt);
 549    case JSON_INTERP:
 550        return parse_interpolation(ctxt);
 551    case JSON_INTEGER:
 552    case JSON_FLOAT:
 553    case JSON_STRING:
 554        return parse_literal(ctxt);
 555    case JSON_KEYWORD:
 556        return parse_keyword(ctxt);
 557    default:
 558        parse_error(ctxt, token, "expecting value");
 559        return NULL;
 560    }
 561}
 562
 563JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
 564{
 565    JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
 566
 567    token->type = type;
 568    memcpy(token->str, tokstr->str, tokstr->len);
 569    token->str[tokstr->len] = 0;
 570    token->x = x;
 571    token->y = y;
 572    return token;
 573}
 574
 575QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
 576{
 577    JSONParserContext ctxt = { .buf = tokens, .ap = ap };
 578    QObject *result;
 579
 580    result = parse_value(&ctxt);
 581    assert(ctxt.err || g_queue_is_empty(ctxt.buf));
 582
 583    error_propagate(errp, ctxt.err);
 584
 585    while (!g_queue_is_empty(ctxt.buf)) {
 586        parser_context_pop_token(&ctxt);
 587    }
 588    g_free(ctxt.current);
 589
 590    return result;
 591}
 592