qemu/qobject/json-parser.c
<<
>>
Prefs
   1/*
   2 * JSON Parser 
   3 *
   4 * Copyright IBM, Corp. 2009
   5 *
   6 * Authors:
   7 *  Anthony Liguori   <aliguori@us.ibm.com>
   8 *
   9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10 * See the COPYING.LIB file in the top-level directory.
  11 *
  12 */
  13
  14#include <stdarg.h>
  15
  16#include "qemu-common.h"
  17#include "qapi/qmp/qstring.h"
  18#include "qapi/qmp/qint.h"
  19#include "qapi/qmp/qdict.h"
  20#include "qapi/qmp/qlist.h"
  21#include "qapi/qmp/qfloat.h"
  22#include "qapi/qmp/qbool.h"
  23#include "qapi/qmp/json-parser.h"
  24#include "qapi/qmp/json-lexer.h"
  25#include "qapi/qmp/qerror.h"
  26
  27typedef struct JSONParserContext
  28{
  29    Error *err;
  30    struct {
  31        QObject **buf;
  32        size_t pos;
  33        size_t count;
  34    } tokens;
  35} JSONParserContext;
  36
  37#define BUG_ON(cond) assert(!(cond))
  38
  39/**
  40 * TODO
  41 *
  42 * 0) make errors meaningful again
  43 * 1) add geometry information to tokens
  44 * 3) should we return a parsed size?
  45 * 4) deal with premature EOI
  46 */
  47
  48static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
  49
  50/**
  51 * Token manipulators
  52 *
  53 * tokens are dictionaries that contain a type, a string value, and geometry information
  54 * about a token identified by the lexer.  These are routines that make working with
  55 * these objects a bit easier.
  56 */
  57static const char *token_get_value(QObject *obj)
  58{
  59    return qdict_get_str(qobject_to_qdict(obj), "token");
  60}
  61
  62static JSONTokenType token_get_type(QObject *obj)
  63{
  64    return qdict_get_int(qobject_to_qdict(obj), "type");
  65}
  66
  67static int token_is_operator(QObject *obj, char op)
  68{
  69    const char *val;
  70
  71    if (token_get_type(obj) != JSON_OPERATOR) {
  72        return 0;
  73    }
  74
  75    val = token_get_value(obj);
  76
  77    return (val[0] == op) && (val[1] == 0);
  78}
  79
  80static int token_is_keyword(QObject *obj, const char *value)
  81{
  82    if (token_get_type(obj) != JSON_KEYWORD) {
  83        return 0;
  84    }
  85
  86    return strcmp(token_get_value(obj), value) == 0;
  87}
  88
  89static int token_is_escape(QObject *obj, const char *value)
  90{
  91    if (token_get_type(obj) != JSON_ESCAPE) {
  92        return 0;
  93    }
  94
  95    return (strcmp(token_get_value(obj), value) == 0);
  96}
  97
  98/**
  99 * Error handler
 100 */
 101static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
 102                                           QObject *token, const char *msg, ...)
 103{
 104    va_list ap;
 105    char message[1024];
 106    va_start(ap, msg);
 107    vsnprintf(message, sizeof(message), msg, ap);
 108    va_end(ap);
 109    if (ctxt->err) {
 110        error_free(ctxt->err);
 111        ctxt->err = NULL;
 112    }
 113    error_setg(&ctxt->err, "JSON parse error, %s", message);
 114}
 115
 116/**
 117 * String helpers
 118 *
 119 * These helpers are used to unescape strings.
 120 */
 121static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
 122{
 123    if (wchar <= 0x007F) {
 124        BUG_ON(buffer_length < 2);
 125
 126        buffer[0] = wchar & 0x7F;
 127        buffer[1] = 0;
 128    } else if (wchar <= 0x07FF) {
 129        BUG_ON(buffer_length < 3);
 130
 131        buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
 132        buffer[1] = 0x80 | (wchar & 0x3F);
 133        buffer[2] = 0;
 134    } else {
 135        BUG_ON(buffer_length < 4);
 136
 137        buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
 138        buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
 139        buffer[2] = 0x80 | (wchar & 0x3F);
 140        buffer[3] = 0;
 141    }
 142}
 143
 144static int hex2decimal(char ch)
 145{
 146    if (ch >= '0' && ch <= '9') {
 147        return (ch - '0');
 148    } else if (ch >= 'a' && ch <= 'f') {
 149        return 10 + (ch - 'a');
 150    } else if (ch >= 'A' && ch <= 'F') {
 151        return 10 + (ch - 'A');
 152    }
 153
 154    return -1;
 155}
 156
 157/**
 158 * parse_string(): Parse a json string and return a QObject
 159 *
 160 *  string
 161 *      ""
 162 *      " chars "
 163 *  chars
 164 *      char
 165 *      char chars
 166 *  char
 167 *      any-Unicode-character-
 168 *          except-"-or-\-or-
 169 *          control-character
 170 *      \"
 171 *      \\
 172 *      \/
 173 *      \b
 174 *      \f
 175 *      \n
 176 *      \r
 177 *      \t
 178 *      \u four-hex-digits 
 179 */
 180static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
 181{
 182    const char *ptr = token_get_value(token);
 183    QString *str;
 184    int double_quote = 1;
 185
 186    if (*ptr == '"') {
 187        double_quote = 1;
 188    } else {
 189        double_quote = 0;
 190    }
 191    ptr++;
 192
 193    str = qstring_new();
 194    while (*ptr && 
 195           ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
 196        if (*ptr == '\\') {
 197            ptr++;
 198
 199            switch (*ptr) {
 200            case '"':
 201                qstring_append(str, "\"");
 202                ptr++;
 203                break;
 204            case '\'':
 205                qstring_append(str, "'");
 206                ptr++;
 207                break;
 208            case '\\':
 209                qstring_append(str, "\\");
 210                ptr++;
 211                break;
 212            case '/':
 213                qstring_append(str, "/");
 214                ptr++;
 215                break;
 216            case 'b':
 217                qstring_append(str, "\b");
 218                ptr++;
 219                break;
 220            case 'f':
 221                qstring_append(str, "\f");
 222                ptr++;
 223                break;
 224            case 'n':
 225                qstring_append(str, "\n");
 226                ptr++;
 227                break;
 228            case 'r':
 229                qstring_append(str, "\r");
 230                ptr++;
 231                break;
 232            case 't':
 233                qstring_append(str, "\t");
 234                ptr++;
 235                break;
 236            case 'u': {
 237                uint16_t unicode_char = 0;
 238                char utf8_char[4];
 239                int i = 0;
 240
 241                ptr++;
 242
 243                for (i = 0; i < 4; i++) {
 244                    if (qemu_isxdigit(*ptr)) {
 245                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
 246                    } else {
 247                        parse_error(ctxt, token,
 248                                    "invalid hex escape sequence in string");
 249                        goto out;
 250                    }
 251                    ptr++;
 252                }
 253
 254                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
 255                qstring_append(str, utf8_char);
 256            }   break;
 257            default:
 258                parse_error(ctxt, token, "invalid escape sequence in string");
 259                goto out;
 260            }
 261        } else {
 262            char dummy[2];
 263
 264            dummy[0] = *ptr++;
 265            dummy[1] = 0;
 266
 267            qstring_append(str, dummy);
 268        }
 269    }
 270
 271    return str;
 272
 273out:
 274    QDECREF(str);
 275    return NULL;
 276}
 277
 278static QObject *parser_context_pop_token(JSONParserContext *ctxt)
 279{
 280    QObject *token;
 281    g_assert(ctxt->tokens.pos < ctxt->tokens.count);
 282    token = ctxt->tokens.buf[ctxt->tokens.pos];
 283    ctxt->tokens.pos++;
 284    return token;
 285}
 286
 287/* Note: parser_context_{peek|pop}_token do not increment the
 288 * token object's refcount. In both cases the references will continue
 289 * to be tracked and cleaned up in parser_context_free(), so do not
 290 * attempt to free the token object.
 291 */
 292static QObject *parser_context_peek_token(JSONParserContext *ctxt)
 293{
 294    QObject *token;
 295    g_assert(ctxt->tokens.pos < ctxt->tokens.count);
 296    token = ctxt->tokens.buf[ctxt->tokens.pos];
 297    return token;
 298}
 299
 300static JSONParserContext parser_context_save(JSONParserContext *ctxt)
 301{
 302    JSONParserContext saved_ctxt = {0};
 303    saved_ctxt.tokens.pos = ctxt->tokens.pos;
 304    saved_ctxt.tokens.count = ctxt->tokens.count;
 305    saved_ctxt.tokens.buf = ctxt->tokens.buf;
 306    return saved_ctxt;
 307}
 308
 309static void parser_context_restore(JSONParserContext *ctxt,
 310                                   JSONParserContext saved_ctxt)
 311{
 312    ctxt->tokens.pos = saved_ctxt.tokens.pos;
 313    ctxt->tokens.count = saved_ctxt.tokens.count;
 314    ctxt->tokens.buf = saved_ctxt.tokens.buf;
 315}
 316
 317static void tokens_append_from_iter(QObject *obj, void *opaque)
 318{
 319    JSONParserContext *ctxt = opaque;
 320    g_assert(ctxt->tokens.pos < ctxt->tokens.count);
 321    ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
 322    qobject_incref(obj);
 323}
 324
 325static JSONParserContext *parser_context_new(QList *tokens)
 326{
 327    JSONParserContext *ctxt;
 328    size_t count;
 329
 330    if (!tokens) {
 331        return NULL;
 332    }
 333
 334    count = qlist_size(tokens);
 335    if (count == 0) {
 336        return NULL;
 337    }
 338
 339    ctxt = g_malloc0(sizeof(JSONParserContext));
 340    ctxt->tokens.pos = 0;
 341    ctxt->tokens.count = count;
 342    ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
 343    qlist_iter(tokens, tokens_append_from_iter, ctxt);
 344    ctxt->tokens.pos = 0;
 345
 346    return ctxt;
 347}
 348
 349/* to support error propagation, ctxt->err must be freed separately */
 350static void parser_context_free(JSONParserContext *ctxt)
 351{
 352    int i;
 353    if (ctxt) {
 354        for (i = 0; i < ctxt->tokens.count; i++) {
 355            qobject_decref(ctxt->tokens.buf[i]);
 356        }
 357        g_free(ctxt->tokens.buf);
 358        g_free(ctxt);
 359    }
 360}
 361
 362/**
 363 * Parsing rules
 364 */
 365static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
 366{
 367    QObject *key = NULL, *token = NULL, *value, *peek;
 368    JSONParserContext saved_ctxt = parser_context_save(ctxt);
 369
 370    peek = parser_context_peek_token(ctxt);
 371    if (peek == NULL) {
 372        parse_error(ctxt, NULL, "premature EOI");
 373        goto out;
 374    }
 375
 376    key = parse_value(ctxt, ap);
 377    if (!key || qobject_type(key) != QTYPE_QSTRING) {
 378        parse_error(ctxt, peek, "key is not a string in object");
 379        goto out;
 380    }
 381
 382    token = parser_context_pop_token(ctxt);
 383    if (token == NULL) {
 384        parse_error(ctxt, NULL, "premature EOI");
 385        goto out;
 386    }
 387
 388    if (!token_is_operator(token, ':')) {
 389        parse_error(ctxt, token, "missing : in object pair");
 390        goto out;
 391    }
 392
 393    value = parse_value(ctxt, ap);
 394    if (value == NULL) {
 395        parse_error(ctxt, token, "Missing value in dict");
 396        goto out;
 397    }
 398
 399    qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
 400
 401    qobject_decref(key);
 402
 403    return 0;
 404
 405out:
 406    parser_context_restore(ctxt, saved_ctxt);
 407    qobject_decref(key);
 408
 409    return -1;
 410}
 411
 412static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
 413{
 414    QDict *dict = NULL;
 415    QObject *token, *peek;
 416    JSONParserContext saved_ctxt = parser_context_save(ctxt);
 417
 418    token = parser_context_pop_token(ctxt);
 419    if (token == NULL) {
 420        goto out;
 421    }
 422
 423    if (!token_is_operator(token, '{')) {
 424        goto out;
 425    }
 426
 427    dict = qdict_new();
 428
 429    peek = parser_context_peek_token(ctxt);
 430    if (peek == NULL) {
 431        parse_error(ctxt, NULL, "premature EOI");
 432        goto out;
 433    }
 434
 435    if (!token_is_operator(peek, '}')) {
 436        if (parse_pair(ctxt, dict, ap) == -1) {
 437            goto out;
 438        }
 439
 440        token = parser_context_pop_token(ctxt);
 441        if (token == NULL) {
 442            parse_error(ctxt, NULL, "premature EOI");
 443            goto out;
 444        }
 445
 446        while (!token_is_operator(token, '}')) {
 447            if (!token_is_operator(token, ',')) {
 448                parse_error(ctxt, token, "expected separator in dict");
 449                goto out;
 450            }
 451
 452            if (parse_pair(ctxt, dict, ap) == -1) {
 453                goto out;
 454            }
 455
 456            token = parser_context_pop_token(ctxt);
 457            if (token == NULL) {
 458                parse_error(ctxt, NULL, "premature EOI");
 459                goto out;
 460            }
 461        }
 462    } else {
 463        (void)parser_context_pop_token(ctxt);
 464    }
 465
 466    return QOBJECT(dict);
 467
 468out:
 469    parser_context_restore(ctxt, saved_ctxt);
 470    QDECREF(dict);
 471    return NULL;
 472}
 473
 474static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
 475{
 476    QList *list = NULL;
 477    QObject *token, *peek;
 478    JSONParserContext saved_ctxt = parser_context_save(ctxt);
 479
 480    token = parser_context_pop_token(ctxt);
 481    if (token == NULL) {
 482        goto out;
 483    }
 484
 485    if (!token_is_operator(token, '[')) {
 486        goto out;
 487    }
 488
 489    list = qlist_new();
 490
 491    peek = parser_context_peek_token(ctxt);
 492    if (peek == NULL) {
 493        parse_error(ctxt, NULL, "premature EOI");
 494        goto out;
 495    }
 496
 497    if (!token_is_operator(peek, ']')) {
 498        QObject *obj;
 499
 500        obj = parse_value(ctxt, ap);
 501        if (obj == NULL) {
 502            parse_error(ctxt, token, "expecting value");
 503            goto out;
 504        }
 505
 506        qlist_append_obj(list, obj);
 507
 508        token = parser_context_pop_token(ctxt);
 509        if (token == NULL) {
 510            parse_error(ctxt, NULL, "premature EOI");
 511            goto out;
 512        }
 513
 514        while (!token_is_operator(token, ']')) {
 515            if (!token_is_operator(token, ',')) {
 516                parse_error(ctxt, token, "expected separator in list");
 517                goto out;
 518            }
 519
 520            obj = parse_value(ctxt, ap);
 521            if (obj == NULL) {
 522                parse_error(ctxt, token, "expecting value");
 523                goto out;
 524            }
 525
 526            qlist_append_obj(list, obj);
 527
 528            token = parser_context_pop_token(ctxt);
 529            if (token == NULL) {
 530                parse_error(ctxt, NULL, "premature EOI");
 531                goto out;
 532            }
 533        }
 534    } else {
 535        (void)parser_context_pop_token(ctxt);
 536    }
 537
 538    return QOBJECT(list);
 539
 540out:
 541    parser_context_restore(ctxt, saved_ctxt);
 542    QDECREF(list);
 543    return NULL;
 544}
 545
 546static QObject *parse_keyword(JSONParserContext *ctxt)
 547{
 548    QObject *token, *ret;
 549    JSONParserContext saved_ctxt = parser_context_save(ctxt);
 550
 551    token = parser_context_pop_token(ctxt);
 552    if (token == NULL) {
 553        goto out;
 554    }
 555
 556    if (token_get_type(token) != JSON_KEYWORD) {
 557        goto out;
 558    }
 559
 560    if (token_is_keyword(token, "true")) {
 561        ret = QOBJECT(qbool_from_int(true));
 562    } else if (token_is_keyword(token, "false")) {
 563        ret = QOBJECT(qbool_from_int(false));
 564    } else {
 565        parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
 566        goto out;
 567    }
 568
 569    return ret;
 570
 571out: 
 572    parser_context_restore(ctxt, saved_ctxt);
 573
 574    return NULL;
 575}
 576
 577static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
 578{
 579    QObject *token = NULL, *obj;
 580    JSONParserContext saved_ctxt = parser_context_save(ctxt);
 581
 582    if (ap == NULL) {
 583        goto out;
 584    }
 585
 586    token = parser_context_pop_token(ctxt);
 587    if (token == NULL) {
 588        goto out;
 589    }
 590
 591    if (token_is_escape(token, "%p")) {
 592        obj = va_arg(*ap, QObject *);
 593    } else if (token_is_escape(token, "%i")) {
 594        obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
 595    } else if (token_is_escape(token, "%d")) {
 596        obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
 597    } else if (token_is_escape(token, "%ld")) {
 598        obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
 599    } else if (token_is_escape(token, "%lld") ||
 600               token_is_escape(token, "%I64d")) {
 601        obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
 602    } else if (token_is_escape(token, "%s")) {
 603        obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
 604    } else if (token_is_escape(token, "%f")) {
 605        obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
 606    } else {
 607        goto out;
 608    }
 609
 610    return obj;
 611
 612out:
 613    parser_context_restore(ctxt, saved_ctxt);
 614
 615    return NULL;
 616}
 617
 618static QObject *parse_literal(JSONParserContext *ctxt)
 619{
 620    QObject *token, *obj;
 621    JSONParserContext saved_ctxt = parser_context_save(ctxt);
 622
 623    token = parser_context_pop_token(ctxt);
 624    if (token == NULL) {
 625        goto out;
 626    }
 627
 628    switch (token_get_type(token)) {
 629    case JSON_STRING:
 630        obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
 631        break;
 632    case JSON_INTEGER: {
 633        /* A possibility exists that this is a whole-valued float where the
 634         * fractional part was left out due to being 0 (.0). It's not a big
 635         * deal to treat these as ints in the parser, so long as users of the
 636         * resulting QObject know to expect a QInt in place of a QFloat in
 637         * cases like these.
 638         *
 639         * However, in some cases these values will overflow/underflow a
 640         * QInt/int64 container, thus we should assume these are to be handled
 641         * as QFloats/doubles rather than silently changing their values.
 642         *
 643         * strtoll() indicates these instances by setting errno to ERANGE
 644         */
 645        int64_t value;
 646
 647        errno = 0; /* strtoll doesn't set errno on success */
 648        value = strtoll(token_get_value(token), NULL, 10);
 649        if (errno != ERANGE) {
 650            obj = QOBJECT(qint_from_int(value));
 651            break;
 652        }
 653        /* fall through to JSON_FLOAT */
 654    }
 655    case JSON_FLOAT:
 656        /* FIXME dependent on locale */
 657        obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
 658        break;
 659    default:
 660        goto out;
 661    }
 662
 663    return obj;
 664
 665out:
 666    parser_context_restore(ctxt, saved_ctxt);
 667
 668    return NULL;
 669}
 670
 671static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
 672{
 673    QObject *obj;
 674
 675    obj = parse_object(ctxt, ap);
 676    if (obj == NULL) {
 677        obj = parse_array(ctxt, ap);
 678    }
 679    if (obj == NULL) {
 680        obj = parse_escape(ctxt, ap);
 681    }
 682    if (obj == NULL) {
 683        obj = parse_keyword(ctxt);
 684    } 
 685    if (obj == NULL) {
 686        obj = parse_literal(ctxt);
 687    }
 688
 689    return obj;
 690}
 691
 692QObject *json_parser_parse(QList *tokens, va_list *ap)
 693{
 694    return json_parser_parse_err(tokens, ap, NULL);
 695}
 696
 697QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
 698{
 699    JSONParserContext *ctxt = parser_context_new(tokens);
 700    QObject *result;
 701
 702    if (!ctxt) {
 703        return NULL;
 704    }
 705
 706    result = parse_value(ctxt, ap);
 707
 708    error_propagate(errp, ctxt->err);
 709
 710    parser_context_free(ctxt);
 711
 712    return result;
 713}
 714