qemu/qobject/json-parser.c
<<
>>
Prefs
   1/*
   2 * JSON Parser
   3 *
   4 * Copyright IBM, Corp. 2009
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10 * See the COPYING.LIB file in the top-level directory.
  11 *
  12 */
  13
  14#include "qemu/osdep.h"
  15#include "qemu/cutils.h"
  16#include "qemu/unicode.h"
  17#include "qapi/error.h"
  18#include "qemu-common.h"
  19#include "qapi/qmp/qbool.h"
  20#include "qapi/qmp/qdict.h"
  21#include "qapi/qmp/qlist.h"
  22#include "qapi/qmp/qnull.h"
  23#include "qapi/qmp/qnum.h"
  24#include "qapi/qmp/qstring.h"
  25#include "json-parser-int.h"
  26
  27struct JSONToken {
  28    JSONTokenType type;
  29    int x;
  30    int y;
  31    char str[];
  32};
  33
  34typedef struct JSONParserContext
  35{
  36    Error *err;
  37    JSONToken *current;
  38    GQueue *buf;
  39    va_list *ap;
  40} JSONParserContext;
  41
  42#define BUG_ON(cond) assert(!(cond))
  43
  44/**
  45 * TODO
  46 *
  47 * 0) make errors meaningful again
  48 * 1) add geometry information to tokens
  49 * 3) should we return a parsed size?
  50 * 4) deal with premature EOI
  51 */
  52
  53static QObject *parse_value(JSONParserContext *ctxt);
  54
  55/**
  56 * Error handler
  57 */
  58static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
  59                                           JSONToken *token, const char *msg, ...)
  60{
  61    va_list ap;
  62    char message[1024];
  63
  64    if (ctxt->err) {
  65        return;
  66    }
  67    va_start(ap, msg);
  68    vsnprintf(message, sizeof(message), msg, ap);
  69    va_end(ap);
  70    error_setg(&ctxt->err, "JSON parse error, %s", message);
  71}
  72
  73static int cvt4hex(const char *s)
  74{
  75    int cp, i;
  76
  77    cp = 0;
  78    for (i = 0; i < 4; i++) {
  79        if (!qemu_isxdigit(s[i])) {
  80            return -1;
  81        }
  82        cp <<= 4;
  83        if (s[i] >= '0' && s[i] <= '9') {
  84            cp |= s[i] - '0';
  85        } else if (s[i] >= 'a' && s[i] <= 'f') {
  86            cp |= 10 + s[i] - 'a';
  87        } else if (s[i] >= 'A' && s[i] <= 'F') {
  88            cp |= 10 + s[i] - 'A';
  89        } else {
  90            return -1;
  91        }
  92    }
  93    return cp;
  94}
  95
  96/**
  97 * parse_string(): Parse a JSON string
  98 *
  99 * From RFC 8259 "The JavaScript Object Notation (JSON) Data
 100 * Interchange Format":
 101 *
 102 *    char = unescaped /
 103 *        escape (
 104 *            %x22 /          ; "    quotation mark  U+0022
 105 *            %x5C /          ; \    reverse solidus U+005C
 106 *            %x2F /          ; /    solidus         U+002F
 107 *            %x62 /          ; b    backspace       U+0008
 108 *            %x66 /          ; f    form feed       U+000C
 109 *            %x6E /          ; n    line feed       U+000A
 110 *            %x72 /          ; r    carriage return U+000D
 111 *            %x74 /          ; t    tab             U+0009
 112 *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
 113 *    escape = %x5C              ; \
 114 *    quotation-mark = %x22      ; "
 115 *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
 116 *
 117 * Extensions over RFC 8259:
 118 * - Extra escape sequence in strings:
 119 *   0x27 (apostrophe) is recognized after escape, too
 120 * - Single-quoted strings:
 121 *   Like double-quoted strings, except they're delimited by %x27
 122 *   (apostrophe) instead of %x22 (quotation mark), and can't contain
 123 *   unescaped apostrophe, but can contain unescaped quotation mark.
 124 *
 125 * Note:
 126 * - Encoding is modified UTF-8.
 127 * - Invalid Unicode characters are rejected.
 128 * - Control characters \x00..\x1F are rejected by the lexer.
 129 */
 130static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
 131{
 132    const char *ptr = token->str;
 133    QString *str;
 134    char quote;
 135    const char *beg;
 136    int cp, trailing;
 137    char *end;
 138    ssize_t len;
 139    char utf8_buf[5];
 140
 141    assert(*ptr == '"' || *ptr == '\'');
 142    quote = *ptr++;
 143    str = qstring_new();
 144
 145    while (*ptr != quote) {
 146        assert(*ptr);
 147        switch (*ptr) {
 148        case '\\':
 149            beg = ptr++;
 150            switch (*ptr++) {
 151            case '"':
 152                qstring_append_chr(str, '"');
 153                break;
 154            case '\'':
 155                qstring_append_chr(str, '\'');
 156                break;
 157            case '\\':
 158                qstring_append_chr(str, '\\');
 159                break;
 160            case '/':
 161                qstring_append_chr(str, '/');
 162                break;
 163            case 'b':
 164                qstring_append_chr(str, '\b');
 165                break;
 166            case 'f':
 167                qstring_append_chr(str, '\f');
 168                break;
 169            case 'n':
 170                qstring_append_chr(str, '\n');
 171                break;
 172            case 'r':
 173                qstring_append_chr(str, '\r');
 174                break;
 175            case 't':
 176                qstring_append_chr(str, '\t');
 177                break;
 178            case 'u':
 179                cp = cvt4hex(ptr);
 180                ptr += 4;
 181
 182                /* handle surrogate pairs */
 183                if (cp >= 0xD800 && cp <= 0xDBFF
 184                    && ptr[0] == '\\' && ptr[1] == 'u') {
 185                    /* leading surrogate followed by \u */
 186                    cp = 0x10000 + ((cp & 0x3FF) << 10);
 187                    trailing = cvt4hex(ptr + 2);
 188                    if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
 189                        /* followed by trailing surrogate */
 190                        cp |= trailing & 0x3FF;
 191                        ptr += 6;
 192                    } else {
 193                        cp = -1; /* invalid */
 194                    }
 195                }
 196
 197                if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
 198                    parse_error(ctxt, token,
 199                                "%.*s is not a valid Unicode character",
 200                                (int)(ptr - beg), beg);
 201                    goto out;
 202                }
 203                qstring_append(str, utf8_buf);
 204                break;
 205            default:
 206                parse_error(ctxt, token, "invalid escape sequence in string");
 207                goto out;
 208            }
 209            break;
 210        case '%':
 211            if (ctxt->ap && ptr[1] != '%') {
 212                parse_error(ctxt, token, "can't interpolate into string");
 213                goto out;
 214            }
 215            ptr++;
 216            /* fall through */
 217        default:
 218            cp = mod_utf8_codepoint(ptr, 6, &end);
 219            if (cp < 0) {
 220                parse_error(ctxt, token, "invalid UTF-8 sequence in string");
 221                goto out;
 222            }
 223            ptr = end;
 224            len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
 225            assert(len >= 0);
 226            qstring_append(str, utf8_buf);
 227        }
 228    }
 229
 230    return str;
 231
 232out:
 233    qobject_unref(str);
 234    return NULL;
 235}
 236
 237/* Note: the token object returned by parser_context_peek_token or
 238 * parser_context_pop_token is deleted as soon as parser_context_pop_token
 239 * is called again.
 240 */
 241static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
 242{
 243    g_free(ctxt->current);
 244    ctxt->current = g_queue_pop_head(ctxt->buf);
 245    return ctxt->current;
 246}
 247
 248static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
 249{
 250    return g_queue_peek_head(ctxt->buf);
 251}
 252
 253/**
 254 * Parsing rules
 255 */
 256static int parse_pair(JSONParserContext *ctxt, QDict *dict)
 257{
 258    QObject *value;
 259    QString *key = NULL;
 260    JSONToken *peek, *token;
 261
 262    peek = parser_context_peek_token(ctxt);
 263    if (peek == NULL) {
 264        parse_error(ctxt, NULL, "premature EOI");
 265        goto out;
 266    }
 267
 268    key = qobject_to(QString, parse_value(ctxt));
 269    if (!key) {
 270        parse_error(ctxt, peek, "key is not a string in object");
 271        goto out;
 272    }
 273
 274    token = parser_context_pop_token(ctxt);
 275    if (token == NULL) {
 276        parse_error(ctxt, NULL, "premature EOI");
 277        goto out;
 278    }
 279
 280    if (token->type != JSON_COLON) {
 281        parse_error(ctxt, token, "missing : in object pair");
 282        goto out;
 283    }
 284
 285    value = parse_value(ctxt);
 286    if (value == NULL) {
 287        parse_error(ctxt, token, "Missing value in dict");
 288        goto out;
 289    }
 290
 291    qdict_put_obj(dict, qstring_get_str(key), value);
 292
 293    qobject_unref(key);
 294
 295    return 0;
 296
 297out:
 298    qobject_unref(key);
 299
 300    return -1;
 301}
 302
 303static QObject *parse_object(JSONParserContext *ctxt)
 304{
 305    QDict *dict = NULL;
 306    JSONToken *token, *peek;
 307
 308    token = parser_context_pop_token(ctxt);
 309    assert(token && token->type == JSON_LCURLY);
 310
 311    dict = qdict_new();
 312
 313    peek = parser_context_peek_token(ctxt);
 314    if (peek == NULL) {
 315        parse_error(ctxt, NULL, "premature EOI");
 316        goto out;
 317    }
 318
 319    if (peek->type != JSON_RCURLY) {
 320        if (parse_pair(ctxt, dict) == -1) {
 321            goto out;
 322        }
 323
 324        token = parser_context_pop_token(ctxt);
 325        if (token == NULL) {
 326            parse_error(ctxt, NULL, "premature EOI");
 327            goto out;
 328        }
 329
 330        while (token->type != JSON_RCURLY) {
 331            if (token->type != JSON_COMMA) {
 332                parse_error(ctxt, token, "expected separator in dict");
 333                goto out;
 334            }
 335
 336            if (parse_pair(ctxt, dict) == -1) {
 337                goto out;
 338            }
 339
 340            token = parser_context_pop_token(ctxt);
 341            if (token == NULL) {
 342                parse_error(ctxt, NULL, "premature EOI");
 343                goto out;
 344            }
 345        }
 346    } else {
 347        (void)parser_context_pop_token(ctxt);
 348    }
 349
 350    return QOBJECT(dict);
 351
 352out:
 353    qobject_unref(dict);
 354    return NULL;
 355}
 356
 357static QObject *parse_array(JSONParserContext *ctxt)
 358{
 359    QList *list = NULL;
 360    JSONToken *token, *peek;
 361
 362    token = parser_context_pop_token(ctxt);
 363    assert(token && token->type == JSON_LSQUARE);
 364
 365    list = qlist_new();
 366
 367    peek = parser_context_peek_token(ctxt);
 368    if (peek == NULL) {
 369        parse_error(ctxt, NULL, "premature EOI");
 370        goto out;
 371    }
 372
 373    if (peek->type != JSON_RSQUARE) {
 374        QObject *obj;
 375
 376        obj = parse_value(ctxt);
 377        if (obj == NULL) {
 378            parse_error(ctxt, token, "expecting value");
 379            goto out;
 380        }
 381
 382        qlist_append_obj(list, obj);
 383
 384        token = parser_context_pop_token(ctxt);
 385        if (token == NULL) {
 386            parse_error(ctxt, NULL, "premature EOI");
 387            goto out;
 388        }
 389
 390        while (token->type != JSON_RSQUARE) {
 391            if (token->type != JSON_COMMA) {
 392                parse_error(ctxt, token, "expected separator in list");
 393                goto out;
 394            }
 395
 396            obj = parse_value(ctxt);
 397            if (obj == NULL) {
 398                parse_error(ctxt, token, "expecting value");
 399                goto out;
 400            }
 401
 402            qlist_append_obj(list, obj);
 403
 404            token = parser_context_pop_token(ctxt);
 405            if (token == NULL) {
 406                parse_error(ctxt, NULL, "premature EOI");
 407                goto out;
 408            }
 409        }
 410    } else {
 411        (void)parser_context_pop_token(ctxt);
 412    }
 413
 414    return QOBJECT(list);
 415
 416out:
 417    qobject_unref(list);
 418    return NULL;
 419}
 420
 421static QObject *parse_keyword(JSONParserContext *ctxt)
 422{
 423    JSONToken *token;
 424
 425    token = parser_context_pop_token(ctxt);
 426    assert(token && token->type == JSON_KEYWORD);
 427
 428    if (!strcmp(token->str, "true")) {
 429        return QOBJECT(qbool_from_bool(true));
 430    } else if (!strcmp(token->str, "false")) {
 431        return QOBJECT(qbool_from_bool(false));
 432    } else if (!strcmp(token->str, "null")) {
 433        return QOBJECT(qnull());
 434    }
 435    parse_error(ctxt, token, "invalid keyword '%s'", token->str);
 436    return NULL;
 437}
 438
 439static QObject *parse_interpolation(JSONParserContext *ctxt)
 440{
 441    JSONToken *token;
 442
 443    token = parser_context_pop_token(ctxt);
 444    assert(token && token->type == JSON_INTERP);
 445
 446    if (!strcmp(token->str, "%p")) {
 447        return va_arg(*ctxt->ap, QObject *);
 448    } else if (!strcmp(token->str, "%i")) {
 449        return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
 450    } else if (!strcmp(token->str, "%d")) {
 451        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
 452    } else if (!strcmp(token->str, "%ld")) {
 453        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
 454    } else if (!strcmp(token->str, "%lld")) {
 455        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
 456    } else if (!strcmp(token->str, "%" PRId64)) {
 457        return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
 458    } else if (!strcmp(token->str, "%u")) {
 459        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
 460    } else if (!strcmp(token->str, "%lu")) {
 461        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
 462    } else if (!strcmp(token->str, "%llu")) {
 463        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
 464    } else if (!strcmp(token->str, "%" PRIu64)) {
 465        return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
 466    } else if (!strcmp(token->str, "%s")) {
 467        return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
 468    } else if (!strcmp(token->str, "%f")) {
 469        return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
 470    }
 471    parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
 472    return NULL;
 473}
 474
 475static QObject *parse_literal(JSONParserContext *ctxt)
 476{
 477    JSONToken *token;
 478
 479    token = parser_context_pop_token(ctxt);
 480    assert(token);
 481
 482    switch (token->type) {
 483    case JSON_STRING:
 484        return QOBJECT(parse_string(ctxt, token));
 485    case JSON_INTEGER: {
 486        /*
 487         * Represent JSON_INTEGER as QNUM_I64 if possible, else as
 488         * QNUM_U64, else as QNUM_DOUBLE.  Note that qemu_strtoi64()
 489         * and qemu_strtou64() fail with ERANGE when it's not
 490         * possible.
 491         *
 492         * qnum_get_int() will then work for any signed 64-bit
 493         * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
 494         * integer, and qnum_get_double() both for any JSON_INTEGER
 495         * and any JSON_FLOAT (with precision loss for integers beyond
 496         * 53 bits)
 497         */
 498        int ret;
 499        int64_t value;
 500        uint64_t uvalue;
 501
 502        ret = qemu_strtoi64(token->str, NULL, 10, &value);
 503        if (!ret) {
 504            return QOBJECT(qnum_from_int(value));
 505        }
 506        assert(ret == -ERANGE);
 507
 508        if (token->str[0] != '-') {
 509            ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
 510            if (!ret) {
 511                return QOBJECT(qnum_from_uint(uvalue));
 512            }
 513            assert(ret == -ERANGE);
 514        }
 515        /* fall through to JSON_FLOAT */
 516    }
 517    case JSON_FLOAT:
 518        /* FIXME dependent on locale; a pervasive issue in QEMU */
 519        /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
 520         * but those might be useful extensions beyond JSON */
 521        return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
 522    default:
 523        abort();
 524    }
 525}
 526
 527static QObject *parse_value(JSONParserContext *ctxt)
 528{
 529    JSONToken *token;
 530
 531    token = parser_context_peek_token(ctxt);
 532    if (token == NULL) {
 533        parse_error(ctxt, NULL, "premature EOI");
 534        return NULL;
 535    }
 536
 537    switch (token->type) {
 538    case JSON_LCURLY:
 539        return parse_object(ctxt);
 540    case JSON_LSQUARE:
 541        return parse_array(ctxt);
 542    case JSON_INTERP:
 543        return parse_interpolation(ctxt);
 544    case JSON_INTEGER:
 545    case JSON_FLOAT:
 546    case JSON_STRING:
 547        return parse_literal(ctxt);
 548    case JSON_KEYWORD:
 549        return parse_keyword(ctxt);
 550    default:
 551        parse_error(ctxt, token, "expecting value");
 552        return NULL;
 553    }
 554}
 555
 556JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
 557{
 558    JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
 559
 560    token->type = type;
 561    memcpy(token->str, tokstr->str, tokstr->len);
 562    token->str[tokstr->len] = 0;
 563    token->x = x;
 564    token->y = y;
 565    return token;
 566}
 567
 568QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
 569{
 570    JSONParserContext ctxt = { .buf = tokens, .ap = ap };
 571    QObject *result;
 572
 573    result = parse_value(&ctxt);
 574    assert(ctxt.err || g_queue_is_empty(ctxt.buf));
 575
 576    error_propagate(errp, ctxt.err);
 577
 578    while (!g_queue_is_empty(ctxt.buf)) {
 579        parser_context_pop_token(&ctxt);
 580    }
 581    g_free(ctxt.current);
 582
 583    return result;
 584}
 585