qemu/qobject/json-parser.c
<<
>>
Prefs
   1/*
   2 * JSON Parser
   3 *
   4 * Copyright IBM, Corp. 2009
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10 * See the COPYING.LIB file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/cutils.h"
  16#include "qemu/unicode.h"
  17#include "qapi/error.h"
  18#include "qemu-common.h"
  19#include "qapi/qmp/qbool.h"
  20#include "qapi/qmp/qdict.h"
  21#include "qapi/qmp/qlist.h"
  22#include "qapi/qmp/qnull.h"
  23#include "qapi/qmp/qnum.h"
  24#include "qapi/qmp/qstring.h"
  25#include "json-parser-int.h"
  26
  27struct JSONToken {
  28    JSONTokenType type;
  29    int x;
  30    int y;
  31    char str[];
  32};
  33
  34typedef struct JSONParserContext
  35{
  36    Error *err;
  37    JSONToken *current;
  38    GQueue *buf;
  39    va_list *ap;
  40} JSONParserContext;
  41
  42#define BUG_ON(cond) assert(!(cond))
  43
  44/**
  45 * TODO
  46 *
  47 * 0) make errors meaningful again
  48 * 1) add geometry information to tokens
  49 * 3) should we return a parsed size?
  50 * 4) deal with premature EOI
  51 */
  52
  53static QObject *parse_value(JSONParserContext *ctxt);
  54
  55/**
  56 * Error handler
  57 */
  58static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
  59                                           JSONToken *token, const char *msg, ...)
  60{
  61    va_list ap;
  62    char message[1024];
  63
  64    if (ctxt->err) {
  65        return;
  66    }
  67    va_start(ap, msg);
  68    vsnprintf(message, sizeof(message), msg, ap);
  69    va_end(ap);
  70    error_setg(&ctxt->err, "JSON parse error, %s", message);
  71}
  72
  73static int cvt4hex(const char *s)
  74{
  75    int cp, i;
  76
  77    cp = 0;
  78    for (i = 0; i < 4; i++) {
  79        if (!qemu_isxdigit(s[i])) {
  80            return -1;
  81        }
  82        cp <<= 4;
  83        if (s[i] >= '0' && s[i] <= '9') {
  84            cp |= s[i] - '0';
  85        } else if (s[i] >= 'a' && s[i] <= 'f') {
  86            cp |= 10 + s[i] - 'a';
  87        } else if (s[i] >= 'A' && s[i] <= 'F') {
  88            cp |= 10 + s[i] - 'A';
  89        } else {
  90            return -1;
  91        }
  92    }
  93    return cp;
  94}
  95
  96/**
  97 * parse_string(): Parse a JSON string
  98 *
  99 * From RFC 8259 "The JavaScript Object Notation (JSON) Data
 100 * Interchange Format":
 101 *
 102 *    char = unescaped /
 103 *        escape (
 104 *            %x22 /          ; "    quotation mark  U+0022
 105 *            %x5C /          ; \    reverse solidus U+005C
 106 *            %x2F /          ; /    solidus         U+002F
 107 *            %x62 /          ; b    backspace       U+0008
 108 *            %x66 /          ; f    form feed       U+000C
 109 *            %x6E /          ; n    line feed       U+000A
 110 *            %x72 /          ; r    carriage return U+000D
 111 *            %x74 /          ; t    tab             U+0009
 112 *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
 113 *    escape = %x5C              ; \
 114 *    quotation-mark = %x22      ; "
 115 *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
 116 *
 117 * Extensions over RFC 8259:
 118 * - Extra escape sequence in strings:
 119 *   0x27 (apostrophe) is recognized after escape, too
 120 * - Single-quoted strings:
 121 *   Like double-quoted strings, except they're delimited by %x27
 122 *   (apostrophe) instead of %x22 (quotation mark), and can't contain
 123 *   unescaped apostrophe, but can contain unescaped quotation mark.
 124 *
 125 * Note:
 126 * - Encoding is modified UTF-8.
 127 * - Invalid Unicode characters are rejected.
 128 * - Control characters \x00..\x1F are rejected by the lexer.
 129 */
 130static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
 131{
 132    const char *ptr = token->str;
 133    QString *str;
 134    char quote;
 135    const char *beg;
 136    int cp, trailing;
 137    char *end;
 138    ssize_t len;
 139    char utf8_buf[5];
 140
 141    assert(*ptr == '"' || *ptr == '\'');
 142    quote = *ptr++;
 143    str = qstring_new();
 144
 145    while (*ptr != quote) {
 146        assert(*ptr);
 147        switch (*ptr) {
 148        case '\\':
 149            beg = ptr++;
 150            switch (*ptr++) {
 151            case '"':
 152                qstring_append_chr(str, '"');
 153                break;
 154            case '\'':
 155                qstring_append_chr(str, '\'');
 156                break;
 157            case '\\':
 158                qstring_append_chr(str, '\\');
 159                break;
 160            case '/':
 161                qstring_append_chr(str, '/');
 162                break;
 163            case 'b':
 164                qstring_append_chr(str, '\b');
 165                break;
 166            case 'f':
 167                qstring_append_chr(str, '\f');
 168                break;
 169            case 'n':
 170                qstring_append_chr(str, '\n');
 171                break;
 172            case 'r':
 173                qstring_append_chr(str, '\r');
 174                break;
 175            case 't':
 176                qstring_append_chr(str, '\t');
 177                break;
 178            case 'u':
 179                cp = cvt4hex(ptr);
 180                ptr += 4;
 181
 182                /* handle surrogate pairs */
 183                if (cp >= 0xD800 && cp <= 0xDBFF
 184                    && ptr[0] == '\\' && ptr[1] == 'u') {
 185                    /* leading surrogate followed by \u */
 186                    cp = 0x10000 + ((cp & 0x3FF) << 10);
 187                    trailing = cvt4hex(ptr + 2);
 188                    if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
 189                        /* followed by trailing surrogate */
 190                        cp |= trailing & 0x3FF;
 191                        ptr += 6;
 192                    } else {
 193                        cp = -1; /* invalid */
 194                    }
 195                }
 196
 197                if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
 198                    parse_error(ctxt, token,
 199                                "%.*s is not a valid Unicode character",
 200                                (int)(ptr - beg), beg);
 201                    goto out;
 202                }
 203                qstring_append(str, utf8_buf);
 204                break;
 205            default:
 206                parse_error(ctxt, token, "invalid escape sequence in string");
 207                goto out;
 208            }
 209            break;
 210        case '%':
 211            if (ctxt->ap) {
 212                if (ptr[1] != '%') {
 213                    parse_error(ctxt, token, "can't interpolate into string");
 214                    goto out;
 215                }
 216                ptr++;
 217            }
 218            /* fall through */
 219        default:
 220            cp = mod_utf8_codepoint(ptr, 6, &end);
 221            if (cp < 0) {
 222                parse_error(ctxt, token, "invalid UTF-8 sequence in string");
 223                goto out;
 224            }
 225            ptr = end;
 226            len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
 227            assert(len >= 0);
 228            qstring_append(str, utf8_buf);
 229        }
 230    }
 231
 232    return str;
 233
 234out:
 235    qobject_unref(str);
 236    return NULL;
 237}
 238
 239/* Note: the token object returned by parser_context_peek_token or
 240 * parser_context_pop_token is deleted as soon as parser_context_pop_token
 241 * is called again.
 242 */
 243static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
 244{
 245    g_free(ctxt->current);
 246    ctxt->current = g_queue_pop_head(ctxt->buf);
 247    return ctxt->current;
 248}
 249
 250static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
 251{
 252    return g_queue_peek_head(ctxt->buf);
 253}
 254
 255/**
 256 * Parsing rules
 257 */
 258static int parse_pair(JSONParserContext *ctxt, QDict *dict)
 259{
 260    QObject *value;
 261    QString *key = NULL;
 262    JSONToken *peek, *token;
 263
 264    peek = parser_context_peek_token(ctxt);
 265    if (peek == NULL) {
 266        parse_error(ctxt, NULL, "premature EOI");
 267        goto out;
 268    }
 269
 270    key = qobject_to(QString, parse_value(ctxt));
 271    if (!key) {
 272        parse_error(ctxt, peek, "key is not a string in object");
 273        goto out;
 274    }
 275
 276    token = parser_context_pop_token(ctxt);
 277    if (token == NULL) {
 278        parse_error(ctxt, NULL, "premature EOI");
 279        goto out;
 280    }
 281
 282    if (token->type != JSON_COLON) {
 283        parse_error(ctxt, token, "missing : in object pair");
 284        goto out;
 285    }
 286
 287    value = parse_value(ctxt);
 288    if (value == NULL) {
 289        parse_error(ctxt, token, "Missing value in dict");
 290        goto out;
 291    }
 292
 293    qdict_put_obj(dict, qstring_get_str(key), value);
 294
 295    qobject_unref(key);
 296
 297    return 0;
 298
 299out:
 300    qobject_unref(key);
 301
 302    return -1;
 303}
 304
 305static QObject *parse_object(JSONParserContext *ctxt)
 306{
 307    QDict *dict = NULL;
 308    JSONToken *token, *peek;
 309
 310    token = parser_context_pop_token(ctxt);
 311    assert(token && token->type == JSON_LCURLY);
 312
 313    dict = qdict_new();
 314
 315    peek = parser_context_peek_token(ctxt);
 316    if (peek == NULL) {
 317        parse_error(ctxt, NULL, "premature EOI");
 318        goto out;
 319    }
 320
 321    if (peek->type != JSON_RCURLY) {
 322        if (parse_pair(ctxt, dict) == -1) {
 323            goto out;
 324        }
 325
 326        token = parser_context_pop_token(ctxt);
 327        if (token == NULL) {
 328            parse_error(ctxt, NULL, "premature EOI");
 329            goto out;
 330        }
 331
 332        while (token->type != JSON_RCURLY) {
 333            if (token->type != JSON_COMMA) {
 334                parse_error(ctxt, token, "expected separator in dict");
 335                goto out;
 336            }
 337
 338            if (parse_pair(ctxt, dict) == -1) {
 339                goto out;
 340            }
 341
 342            token = parser_context_pop_token(ctxt);
 343            if (token == NULL) {
 344                parse_error(ctxt, NULL, "premature EOI");
 345                goto out;
 346            }
 347        }
 348    } else {
 349        (void)parser_context_pop_token(ctxt);
 350    }
 351
 352    return QOBJECT(dict);
 353
 354out:
 355    qobject_unref(dict);
 356    return NULL;
 357}
 358
 359static QObject *parse_array(JSONParserContext *ctxt)
 360{
 361    QList *list = NULL;
 362    JSONToken *token, *peek;
 363
 364    token = parser_context_pop_token(ctxt);
 365    assert(token && token->type == JSON_LSQUARE);
 366
 367    list = qlist_new();
 368
 369    peek = parser_context_peek_token(ctxt);
 370    if (peek == NULL) {
 371        parse_error(ctxt, NULL, "premature EOI");
 372        goto out;
 373    }
 374
 375    if (peek->type != JSON_RSQUARE) {
 376        QObject *obj;
 377
 378        obj = parse_value(ctxt);
 379        if (obj == NULL) {
 380            parse_error(ctxt, token, "expecting value");
 381            goto out;
 382        }
 383
 384        qlist_append_obj(list, obj);
 385
 386        token = parser_context_pop_token(ctxt);
 387        if (token == NULL) {
 388            parse_error(ctxt, NULL, "premature EOI");
 389            goto out;
 390        }
 391
 392        while (token->type != JSON_RSQUARE) {
 393            if (token->type != JSON_COMMA) {
 394                parse_error(ctxt, token, "expected separator in list");
 395                goto out;
 396            }
 397
 398            obj = parse_value(ctxt);
 399            if (obj == NULL) {
 400                parse_error(ctxt, token, "expecting value");
 401                goto out;
 402            }
 403
 404            qlist_append_obj(list, obj);
 405
 406            token = parser_context_pop_token(ctxt);
 407            if (token == NULL) {
 408                parse_error(ctxt, NULL, "premature EOI");
 409                goto out;
 410            }
 411        }
 412    } else {
 413        (void)parser_context_pop_token(ctxt);
 414    }
 415
 416    return QOBJECT(list);
 417
 418out:
 419    qobject_unref(list);
 420    return NULL;
 421}
 422
 423static QObject *parse_keyword(JSONParserContext *ctxt)
 424{
 425    JSONToken *token;
 426
 427    token = parser_context_pop_token(ctxt);
 428    assert(token && token->type == JSON_KEYWORD);
 429
 430    if (!strcmp(token->str, "true")) {
 431        return QOBJECT(qbool_from_bool(true));
 432    } else if (!strcmp(token->str, "false")) {
 433        return QOBJECT(qbool_from_bool(false));
 434    } else if (!strcmp(token->str, "null")) {
 435        return QOBJECT(qnull());
 436    }
 437    parse_error(ctxt, token, "invalid keyword '%s'", token->str);
 438    return NULL;
 439}
 440
 441static QObject *parse_interpolation(JSONParserContext *ctxt)
 442{
 443    JSONToken *token;
 444
 445    token = parser_context_pop_token(ctxt);
 446    assert(token && token->type == JSON_INTERP);
 447
 448    if (!strcmp(token->str, "%p")) {
 449        return va_arg(*ctxt->ap, QObject *);
 450    } else if (!strcmp(token->str, "%i")) {
 451        return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
 452    } else if (!strcmp(token->str, "%d")) {
 453        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
 454    } else if (!strcmp(token->str, "%ld")) {
 455        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
 456    } else if (!strcmp(token->str, "%lld")) {
 457        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
 458    } else if (!strcmp(token->str, "%" PRId64)) {
 459        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
 460    } else if (!strcmp(token->str, "%u")) {
 461        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
 462    } else if (!strcmp(token->str, "%lu")) {
 463        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
 464    } else if (!strcmp(token->str, "%llu")) {
 465        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
 466    } else if (!strcmp(token->str, "%" PRIu64)) {
 467        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
 468    } else if (!strcmp(token->str, "%s")) {
 469        return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
 470    } else if (!strcmp(token->str, "%f")) {
 471        return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
 472    }
 473    parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
 474    return NULL;
 475}
 476
 477static QObject *parse_literal(JSONParserContext *ctxt)
 478{
 479    JSONToken *token;
 480
 481    token = parser_context_pop_token(ctxt);
 482    assert(token);
 483
 484    switch (token->type) {
 485    case JSON_STRING:
 486        return QOBJECT(parse_string(ctxt, token));
 487    case JSON_INTEGER: {
 488        /*
 489         * Represent JSON_INTEGER as QNUM_I64 if possible, else as
 490         * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
 491         * and qemu_strtou64() fail with ERANGE when it's not
 492         * possible.
 493         *
 494         * qnum_get_int() will then work for any signed 64-bit
 495         * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
 496         * integer, and qnum_get_double() both for any JSON_INTEGER
 497         * and any JSON_FLOAT (with precision loss for integers beyond
 498         * 53 bits)
 499         */
 500        int ret;
 501        int64_t value;
 502        uint64_t uvalue;
 503
 504        ret = qemu_strtoi64(token->str, NULL, 10, &value);
 505        if (!ret) {
 506            return QOBJECT(qnum_from_int(value));
 507        }
 508        assert(ret == -ERANGE);
 509
 510        if (token->str[0] != '-') {
 511            ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
 512            if (!ret) {
 513                return QOBJECT(qnum_from_uint(uvalue));
 514            }
 515            assert(ret == -ERANGE);
 516        }
 517        /* fall through to JSON_FLOAT */
 518    }
 519    case JSON_FLOAT:
 520        /* FIXME dependent on locale; a pervasive issue in QEMU */
 521        /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
 522         * but those might be useful extensions beyond JSON */
 523        return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
 524    default:
 525        abort();
 526    }
 527}
 528
 529static QObject *parse_value(JSONParserContext *ctxt)
 530{
 531    JSONToken *token;
 532
 533    token = parser_context_peek_token(ctxt);
 534    if (token == NULL) {
 535        parse_error(ctxt, NULL, "premature EOI");
 536        return NULL;
 537    }
 538
 539    switch (token->type) {
 540    case JSON_LCURLY:
 541        return parse_object(ctxt);
 542    case JSON_LSQUARE:
 543        return parse_array(ctxt);
 544    case JSON_INTERP:
 545        return parse_interpolation(ctxt);
 546    case JSON_INTEGER:
 547    case JSON_FLOAT:
 548    case JSON_STRING:
 549        return parse_literal(ctxt);
 550    case JSON_KEYWORD:
 551        return parse_keyword(ctxt);
 552    default:
 553        parse_error(ctxt, token, "expecting value");
 554        return NULL;
 555    }
 556}
 557
 558JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
 559{
 560    JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
 561
 562    token->type = type;
 563    memcpy(token->str, tokstr->str, tokstr->len);
 564    token->str[tokstr->len] = 0;
 565    token->x = x;
 566    token->y = y;
 567    return token;
 568}
 569
 570QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
 571{
 572    JSONParserContext ctxt = { .buf = tokens, .ap = ap };
 573    QObject *result;
 574
 575    result = parse_value(&ctxt);
 576    assert(ctxt.err || g_queue_is_empty(ctxt.buf));
 577
 578    error_propagate(errp, ctxt.err);
 579
 580    while (!g_queue_is_empty(ctxt.buf)) {
 581        parser_context_pop_token(&ctxt);
 582    }
 583    g_free(ctxt.current);
 584
 585    return result;
 586}
 587