json-parser.c revision c3393007354b7ab607449fea0c3d7088193ab208
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher/* Copyright (c) 2013-2017 Dovecot authors, see the included COPYING file */
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
ee359fe1384507fed6c2274e7bfe81d288de4542Stephen Gallagher#include "lib.h"
33396dc46ea52c18f47db1b5d590880806521005Sumit Bose#include "array.h"
ee359fe1384507fed6c2274e7bfe81d288de4542Stephen Gallagher#include "str.h"
33396dc46ea52c18f47db1b5d590880806521005Sumit Bose#include "istream.h"
703dc1eb5b050b24235a6640f271d34ea008cf98Jan Engelhardt#include "hex-dec.h"
703dc1eb5b050b24235a6640f271d34ea008cf98Jan Engelhardt#include "unichar.h"
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher#include "istream-jsonstr.h"
324fb26ba803a999bedc29e93c46c84f27abf5b7Sumit Bose#include "json-parser.h"
324fb26ba803a999bedc29e93c46c84f27abf5b7Sumit Bose
324fb26ba803a999bedc29e93c46c84f27abf5b7Sumit Boseenum json_state {
324fb26ba803a999bedc29e93c46c84f27abf5b7Sumit Bose JSON_STATE_ROOT = 0,
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher JSON_STATE_OBJECT_OPEN,
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher JSON_STATE_OBJECT_KEY,
84ae5edab16ad6be5e3be956cb6fa031c1428eb5Stephen Gallagher JSON_STATE_OBJECT_COLON,
d71cd46ede9c57e3947c86c61306a9caddce242cLukas Slebodnik JSON_STATE_OBJECT_VALUE,
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher JSON_STATE_OBJECT_SKIP_STRING,
002f84aea86371aa079b867c0ec39396b97109d3Lukas Slebodnik JSON_STATE_OBJECT_NEXT,
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher JSON_STATE_ARRAY_OPEN,
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher JSON_STATE_ARRAY_VALUE,
700d45751e997c634504a4f22facd2edf82edea7Lukas Slebodnik JSON_STATE_ARRAY_SKIP_STRING,
87d3b47abba6a40fcf809c85a2b138bc1013d9c5Jakub Hrozek JSON_STATE_ARRAY_NEXT,
87d3b47abba6a40fcf809c85a2b138bc1013d9c5Jakub Hrozek JSON_STATE_ARRAY_NEXT_SKIP,
deeadf40db3a1eec64cf030e54afc4cb8612a8d5Lukas Slebodnik JSON_STATE_VALUE,
ccf340e56364851f2e5b75e52d3d63701b662954Lukas Slebodnik JSON_STATE_DONE
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher};
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
f0ea3ed816182fadf77f3e7f7ddb298b287007adLukas Slebodnikstruct json_parser {
84ae5edab16ad6be5e3be956cb6fa031c1428eb5Stephen Gallagher struct istream *input;
cc98edd9479d4622634a1275c98058916c14059aStephen Gallagher uoff_t highwater_offset;
ee359fe1384507fed6c2274e7bfe81d288de4542Stephen Gallagher enum json_parser_flags flags;
cc98edd9479d4622634a1275c98058916c14059aStephen Gallagher
d3da1c165cdb4c1ec126a8f4b6b544ca415b9d20Pavel Březina const unsigned char *start, *end, *data;
d3da1c165cdb4c1ec126a8f4b6b544ca415b9d20Pavel Březina const char *error;
d3da1c165cdb4c1ec126a8f4b6b544ca415b9d20Pavel Březina string_t *value;
c481179da5d5b53ce16d8784c0bd2857ffc2f061Lukas Slebodnik struct istream *strinput;
1183d29d87c5c7439cf2364b7d7324d4a13b6e35Stephen Gallagher
002f84aea86371aa079b867c0ec39396b97109d3Lukas Slebodnik enum json_state state;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher ARRAY(enum json_state) nesting;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher unsigned int nested_skip_count;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher bool skipping;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher bool seen_eof;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher};
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagherstatic int json_parser_read_more(struct json_parser *parser)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher{
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher uoff_t cur_highwater = parser->input->v_offset +
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher i_stream_get_data_size(parser->input);
c89589fa349f38214c9cb8d9389c0fd557e5dca2Simo Sorce size_t size;
c89589fa349f38214c9cb8d9389c0fd557e5dca2Simo Sorce ssize_t ret;
f775337a7d4ca1c0be8eab683d0d753cbaee49e2Lukas Slebodnik
f775337a7d4ca1c0be8eab683d0d753cbaee49e2Lukas Slebodnik i_assert(parser->highwater_offset <= cur_highwater);
f775337a7d4ca1c0be8eab683d0d753cbaee49e2Lukas Slebodnik
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek if (parser->error != NULL)
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek return -1;
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek
c89589fa349f38214c9cb8d9389c0fd557e5dca2Simo Sorce if (parser->highwater_offset == cur_highwater) {
c89589fa349f38214c9cb8d9389c0fd557e5dca2Simo Sorce ret = i_stream_read(parser->input);
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek if (ret == -2) {
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek parser->error = "Token too large";
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek return -1;
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek }
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek if (ret < 0 && !parser->seen_eof &&
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek i_stream_get_data_size(parser->input) > 0 &&
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek parser->input->stream_errno == 0) {
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek /* call it once more to finish any pending number */
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek parser->seen_eof = TRUE;
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek } else if (ret <= 0) {
86b61156743b7ebdc049450a6f88452890fd9a61Jakub Hrozek return ret;
48130eef6c5c64a07094b9e8582ba358b2048f24Jakub Hrozek } else {
48130eef6c5c64a07094b9e8582ba358b2048f24Jakub Hrozek cur_highwater = parser->input->v_offset +
48130eef6c5c64a07094b9e8582ba358b2048f24Jakub Hrozek i_stream_get_data_size(parser->input);
48130eef6c5c64a07094b9e8582ba358b2048f24Jakub Hrozek i_assert(parser->highwater_offset < cur_highwater);
1658c567191c35beaddffafdb079abe33248037bLukas Slebodnik parser->highwater_offset = cur_highwater;
1658c567191c35beaddffafdb079abe33248037bLukas Slebodnik }
1658c567191c35beaddffafdb079abe33248037bLukas Slebodnik }
faa16fc9f0c9a02b26497e7cf148a92586144c08David Disseldorp
faa16fc9f0c9a02b26497e7cf148a92586144c08David Disseldorp parser->start = parser->data = i_stream_get_data(parser->input, &size);
faa16fc9f0c9a02b26497e7cf148a92586144c08David Disseldorp parser->end = parser->start + size;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher i_assert(size > 0);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return 1;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher}
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagherstatic void json_parser_update_input_pos(struct json_parser *parser)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher{
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher size_t size;
d921c1eba437662437847279f251a0a5d8f70127Maxim
d921c1eba437662437847279f251a0a5d8f70127Maxim if (parser->data == parser->start)
d921c1eba437662437847279f251a0a5d8f70127Maxim return;
d921c1eba437662437847279f251a0a5d8f70127Maxim
d921c1eba437662437847279f251a0a5d8f70127Maxim i_stream_skip(parser->input, parser->data - parser->start);
d921c1eba437662437847279f251a0a5d8f70127Maxim parser->start = parser->data = i_stream_get_data(parser->input, &size);
d921c1eba437662437847279f251a0a5d8f70127Maxim parser->end = parser->start + size;
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer if (size > 0) {
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer /* we skipped over some data and there's still data left.
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer no need to read() the next time. */
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer parser->highwater_offset = 0;
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer } else {
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer parser->highwater_offset = parser->input->v_offset;
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer }
327127bb7fcc07f882209f029e14026de1b23c94Maxim}
327127bb7fcc07f882209f029e14026de1b23c94Maxim
327127bb7fcc07f882209f029e14026de1b23c94Maximstruct json_parser *json_parser_init(struct istream *input)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher{
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return json_parser_init_flags(input, 0);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher}
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagherstruct json_parser *json_parser_init_flags(struct istream *input,
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher enum json_parser_flags flags)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher{
eb2e21b764d03544d8161e9956d7f70b07b75f77Simo Sorce struct json_parser *parser;
bc9235cfb80bd64a3bfa959e8d26d5ad1be0bdf4Jakub Hrozek
bc9235cfb80bd64a3bfa959e8d26d5ad1be0bdf4Jakub Hrozek parser = i_new(struct json_parser, 1);
07d82f79d2970a08628ebf71343441ec55faa6faPavel Březina parser->input = input;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher parser->flags = flags;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher parser->value = str_new(default_pool, 128);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher i_array_init(&parser->nesting, 8);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher i_stream_ref(input);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
4b6a0d0b3d42e5fdb457f47d9adfa5e66b160256Stephen Gallagher if ((flags & JSON_PARSER_NO_ROOT_OBJECT) != 0)
90fd1bbd6035cdab46faa3a695a2fb2be6508b17Sumit Bose parser->state = JSON_STATE_VALUE;
03713859dffacc7142393e53c73d8d4cf7dee8d5Pavel Březina return parser;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik}
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik
af4ffe1001adcc0a96897e426d26444f07af9aa1Benjamin Franzkeint json_parser_deinit(struct json_parser **_parser, const char **error_r)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher{
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher struct json_parser *parser = *_parser;
64ea4127f463798410a2c20e0261c6b15f60257fJakub Hrozek
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher *_parser = NULL;
772464c842968d6e544118ae1aa7c49a7cda2ad6Stephen Gallagher
32381402a4a9afc003782c9e2301fc59c9bda2a9Yassir Elley if (parser->error != NULL) {
068dbee9ca7bf5b37330eff91c94ae10f288d09fJakub Hrozek /* actual parser error */
98ce3c3e85a4bb2e1822bf8ab2a1c2ab9e3dd61dJakub Hrozek *error_r = parser->error;
be65f065fef1d387281096ef095a2acef39ecc12Jakub Hrozek } else if (parser->input->stream_errno != 0) {
e124844907ed6973915e4d56f5442ecd07535a12Jakub Hrozek *error_r = t_strdup_printf("read(%s) failed: %s",
f36078af138f052cd9a30360867b0ebd0805af5eJakub Hrozek i_stream_get_name(parser->input),
34c78b745eb349eef2b0f13ef2b722632aebe619Jan Cholasta i_stream_get_error(parser->input));
cb4d5b588e704114b7090678752d33512baa718eJakub Hrozek } else if (parser->data == parser->end &&
e07a94a66985b674c5df11ca466792902164c4e2George McCollister !i_stream_have_bytes_left(parser->input) &&
77c0d1f6074059dafd2293f9c42ea0f9d60f8aadJakub Hrozek parser->state != JSON_STATE_DONE) {
a2e417f38c57ed87c956ddcecf4dafca93842b65Lukas Slebodnik *error_r = "Missing '}'";
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer } else {
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer *error_r = NULL;
a9c287bda3fc2a1e12cef2135ade96945f11ad01Sumit Bose }
bc13c352ba9c2877f1e9bc62e55ad60fc000a55dJakub Hrozek
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher i_stream_unref(&parser->input);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher array_free(&parser->nesting);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher str_free(&parser->value);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher i_free(parser);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return *error_r != NULL ? -1 : 0;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher}
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagherstatic bool json_parse_whitespace(struct json_parser *parser)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher{
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher for (; parser->data != parser->end; parser->data++) {
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher switch (*parser->data) {
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher case ' ':
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher case '\t':
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher case '\r':
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher case '\n':
a5077712fc8c24e8cad08207b7b5a6603bde6a7cJakub Hrozek break;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher default:
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher json_parser_update_input_pos(parser);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return TRUE;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher }
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher }
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher json_parser_update_input_pos(parser);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return FALSE;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher}
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
2a5790216f57e9bdfb2930d52860bb5300366536Jakub Hrozekstatic int json_skip_string(struct json_parser *parser)
b9e5bd09a5ff7009537a18914dbebcf10498f592Sumit Bose{
af4ffe1001adcc0a96897e426d26444f07af9aa1Benjamin Franzke for (; parser->data != parser->end; parser->data++) {
6b0a7c72bb841d6885a620c68bd51d55109b66c7Jakub Hrozek if (*parser->data == '"') {
9917c138d9a270deb5820915384fbde751190c2aLukas Slebodnik parser->data++;
0e1dcef53d9d8465ce97d31ad11be4445a6e7eb8Lukas Slebodnik json_parser_update_input_pos(parser);
c3889e5a101a075defe533d81f5296d5e680f639Lukas Slebodnik return 1;
3fc158e59eebbc2f538fe0076a03928d0d4eab9fPavel Březina }
b9c8ce2bdd4045782c243605a1b999098bedcffcNoam Meltzer if (*parser->data == '\\') {
40b2be4f4312470044cdef460b02b66003f5c85fJakub Hrozek switch (*++parser->data) {
bf54fbed126ec3d459af40ea370ffadacd31c76dJakub Hrozek case '"':
9d453f1e8b28983b363b44c49b7cd701a994fd97Nikolai Kondrashov case '\\':
3fc158e59eebbc2f538fe0076a03928d0d4eab9fPavel Březina case '/':
3fc158e59eebbc2f538fe0076a03928d0d4eab9fPavel Březina case 'b':
3fc158e59eebbc2f538fe0076a03928d0d4eab9fPavel Březina case 'f':
3fc158e59eebbc2f538fe0076a03928d0d4eab9fPavel Březina case 'n':
b32159300fea63222d8dd9200ed634087704ea74Stephen Gallagher case 'r':
b32159300fea63222d8dd9200ed634087704ea74Stephen Gallagher case 't':
b32159300fea63222d8dd9200ed634087704ea74Stephen Gallagher break;
9dbdf62243f01f6aee41c2b5f2976c56da47f25dLukas Slebodnik case 'u':
9dbdf62243f01f6aee41c2b5f2976c56da47f25dLukas Slebodnik if (parser->end - parser->data < 4)
9dbdf62243f01f6aee41c2b5f2976c56da47f25dLukas Slebodnik return -1;
b32159300fea63222d8dd9200ed634087704ea74Stephen Gallagher parser->data += 3;
9dbdf62243f01f6aee41c2b5f2976c56da47f25dLukas Slebodnik break;
9dbdf62243f01f6aee41c2b5f2976c56da47f25dLukas Slebodnik default:
9dbdf62243f01f6aee41c2b5f2976c56da47f25dLukas Slebodnik return -1;
b32159300fea63222d8dd9200ed634087704ea74Stephen Gallagher }
9dbdf62243f01f6aee41c2b5f2976c56da47f25dLukas Slebodnik }
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher }
539b1be3507abdf8ac235b06eeed5011b0b5cde2Ondrej Kos json_parser_update_input_pos(parser);
539b1be3507abdf8ac235b06eeed5011b0b5cde2Ondrej Kos return 0;
574a1c20f114851071ae74112b34488c3d1aeeb3Ondrej Kos}
574a1c20f114851071ae74112b34488c3d1aeeb3Ondrej Kos
574a1c20f114851071ae74112b34488c3d1aeeb3Ondrej Kosstatic int json_parse_unicode_escape(struct json_parser *parser)
574a1c20f114851071ae74112b34488c3d1aeeb3Ondrej Kos{
2a5790216f57e9bdfb2930d52860bb5300366536Jakub Hrozek char chbuf[5] = {0};
e6e26182d58c05d896f72f2925426658a6dc70b5Jakub Hrozek unichar_t chr, hi_surg;
e6e26182d58c05d896f72f2925426658a6dc70b5Jakub Hrozek
9542512d7be40f2000298c86d3d2b728f4f0f65aStephen Gallagher parser->data++;
e6e26182d58c05d896f72f2925426658a6dc70b5Jakub Hrozek if (parser->end - parser->data < 4) {
2a5790216f57e9bdfb2930d52860bb5300366536Jakub Hrozek /* wait for more data */
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher parser->data = parser->end;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return 0;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher }
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher memcpy(chbuf, parser->data, 4);
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher if (str_to_uint32_hex(chbuf, &chr) < 0) {
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher parser->error = "Invalid unicode escape seen";
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return -1;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher }
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher if (UTF16_VALID_HIGH_SURROGATE(chr)) {
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher /* possible surrogate pair */
6b01dae732eedee808f32a9cdd4b5656a9f839c4Jakub Hrozek hi_surg = chr;
6b01dae732eedee808f32a9cdd4b5656a9f839c4Jakub Hrozek chr = 0;
6b01dae732eedee808f32a9cdd4b5656a9f839c4Jakub Hrozek parser->data += 4;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher if (parser->data >= parser->end) {
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher /* wait for more data */
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher parser->data = parser->end;
6b01dae732eedee808f32a9cdd4b5656a9f839c4Jakub Hrozek return 0;
6b01dae732eedee808f32a9cdd4b5656a9f839c4Jakub Hrozek }
6b01dae732eedee808f32a9cdd4b5656a9f839c4Jakub Hrozek if ((parser->end - parser->data) < 2) {
6b01dae732eedee808f32a9cdd4b5656a9f839c4Jakub Hrozek if (parser->data[0] == '\\') {
6b01dae732eedee808f32a9cdd4b5656a9f839c4Jakub Hrozek /* wait for more data */
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher parser->data = parser->end;
6b01dae732eedee808f32a9cdd4b5656a9f839c4Jakub Hrozek return 0;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher }
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher /* error */
b50dffea929ee5cd0c59ba3c4822337cc162ff92Kamil Dudka }
b50dffea929ee5cd0c59ba3c4822337cc162ff92Kamil Dudka if ((parser->end - parser->data) < 6) {
b50dffea929ee5cd0c59ba3c4822337cc162ff92Kamil Dudka if (parser->data[0] == '\\' &&
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher parser->data[1] == 'u') {
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher /* wait for more data */
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek parser->data = parser->end;
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek return 0;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher }
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek /* error */
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek } else {
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek memcpy(chbuf, &parser->data[2], 4);
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek if (str_to_uint32_hex(chbuf, &chr) < 0) {
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek parser->error = "Invalid unicode escape seen";
d9378e64499642e86989158f274372187314d5b2Lukas Slebodnik return -1;
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek }
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek }
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek if (parser->data[0] != '\\' || parser->data[1] != 'u' ||
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek !UTF16_VALID_LOW_SURROGATE(chr)) {
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek parser->error =
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek t_strdup_printf("High surrogate 0x%04x seen, "
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek "but not followed by low surrogate",
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek hi_surg);
6f51c802311fd81a409a26763ed45b28a3234d0dJakub Hrozek return -1;
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek }
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher chr = uni_join_surrogate(hi_surg, chr);
4d81fe27ced3d2e96866aeaf61661a925cb8edf1Jakub Hrozek parser->data += 2;
8b1f525acd20f36c836e827de3c251088961c5d9Stephen Gallagher }
f5b6f977d4144c28e9c66f3f1c9d634d595d1117Marko Myllynen
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher if (!uni_is_valid_ucs4(chr)) {
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik parser->error =
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik t_strdup_printf("Invalid unicode character U+%04x", chr);
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik return -1;
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik }
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik uni_ucs4_to_utf8_c(chr, parser->value);
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik parser->data += 3;
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik return 1;
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik}
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnikstatic int json_parse_string(struct json_parser *parser, bool allow_skip,
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik const char **value_r)
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik{
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik int ret;
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik if (*parser->data != '"')
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik return -1;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik parser->data++;
dc4c30bae512c0b45ff925d9e998337f8fe97e94Lukas Slebodnik
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik if (parser->skipping && allow_skip) {
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher *value_r = NULL;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik return json_skip_string(parser);
72e60fd4eabcfbcdbfe01e8c38b94052bc6c2067Jakub Hrozek }
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik str_truncate(parser->value, 0);
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik for (; parser->data != parser->end; parser->data++) {
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik if (*parser->data == '"') {
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik parser->data++;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik *value_r = str_c(parser->value);
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik return 1;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik }
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik if (*parser->data != '\\')
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik str_append_c(parser->value, *parser->data);
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik else {
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher if (++parser->data == parser->end)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return 0;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik switch (*parser->data) {
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik case '"':
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik case '\\':
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik case '/':
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik str_append_c(parser->value, *parser->data);
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik break;
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik case 'b':
827dd342494de18099dddd0272c1a85f10703556Lukas Slebodnik str_append_c(parser->value, '\b');
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik break;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik case 'f':
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik str_append_c(parser->value, '\f');
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik break;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik case 'n':
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik str_append_c(parser->value, '\n');
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik break;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik case 'r':
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik str_append_c(parser->value, '\r');
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik break;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik case 't':
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik str_append_c(parser->value, '\t');
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik break;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik case 'u':
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik if ((ret=json_parse_unicode_escape(parser)) <= 0)
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik return ret;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik break;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik default:
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik return -1;
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik }
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik }
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik }
4a5a18f489f4d19aa0571528a7f0c7a8d35ac83fLukas Slebodnik return 0;
9d453f1e8b28983b363b44c49b7cd701a994fd97Nikolai Kondrashov}
9d453f1e8b28983b363b44c49b7cd701a994fd97Nikolai Kondrashov
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagherstatic int
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagherjson_parse_digits(struct json_parser *parser)
3d038d2e0dc7af04ec2f7c85ae325accb39f6237Jakub Hrozek{
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher if (parser->data == parser->end)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return 0;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher if (*parser->data < '0' || *parser->data > '9')
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return -1;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher while (parser->data != parser->end &&
2a5790216f57e9bdfb2930d52860bb5300366536Jakub Hrozek *parser->data >= '0' && *parser->data <= '9')
2a5790216f57e9bdfb2930d52860bb5300366536Jakub Hrozek str_append_c(parser->value, *parser->data++);
2a5790216f57e9bdfb2930d52860bb5300366536Jakub Hrozek return 1;
2a5790216f57e9bdfb2930d52860bb5300366536Jakub Hrozek}
77c0d1f6074059dafd2293f9c42ea0f9d60f8aadJakub Hrozek
77c0d1f6074059dafd2293f9c42ea0f9d60f8aadJakub Hrozekstatic int json_parse_int(struct json_parser *parser)
77c0d1f6074059dafd2293f9c42ea0f9d60f8aadJakub Hrozek{
77c0d1f6074059dafd2293f9c42ea0f9d60f8aadJakub Hrozek int ret;
77c0d1f6074059dafd2293f9c42ea0f9d60f8aadJakub Hrozek
e07a94a66985b674c5df11ca466792902164c4e2George McCollister if (*parser->data == '-') {
e07a94a66985b674c5df11ca466792902164c4e2George McCollister str_append_c(parser->value, *parser->data++);
e07a94a66985b674c5df11ca466792902164c4e2George McCollister if (parser->data == parser->end)
bf01e8179cbb2be476805340636098deda7e1366Sumit Bose return 0;
0d5bb38364a6976e9c85d6349aa13a04d181a090Sumit Bose }
0d5bb38364a6976e9c85d6349aa13a04d181a090Sumit Bose if (*parser->data == '0')
0d5bb38364a6976e9c85d6349aa13a04d181a090Sumit Bose str_append_c(parser->value, *parser->data++);
172c07013d1ea99447a780fd36f49d5c3a76981bJakub Hrozek else {
9917c138d9a270deb5820915384fbde751190c2aLukas Slebodnik if ((ret = json_parse_digits(parser)) <= 0)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return ret;
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce }
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce return 1;
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce}
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce
336879aabae137f9a81304f147fb0d43001654b0Simo Sorcestatic int json_parse_number(struct json_parser *parser, const char **value_r)
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce{
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce int ret;
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce str_truncate(parser->value, 0);
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce if ((ret = json_parse_int(parser)) <= 0)
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce return ret;
336879aabae137f9a81304f147fb0d43001654b0Simo Sorce if (parser->data != parser->end && *parser->data == '.') {
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik /* frac */
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik str_append_c(parser->value, *parser->data++);
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik if ((ret = json_parse_digits(parser)) <= 0)
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik return ret;
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik }
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik if (parser->data != parser->end &&
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik (*parser->data == 'e' || *parser->data == 'E')) {
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik /* exp */
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik str_append_c(parser->value, *parser->data++);
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik if (parser->data == parser->end)
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik return 0;
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik if (*parser->data == '+' || *parser->data == '-')
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik str_append_c(parser->value, *parser->data++);
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik if ((ret = json_parse_digits(parser)) <= 0)
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik return ret;
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik }
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik if (parser->data == parser->end && !parser->input->eof)
aac071824f6c98003f30d49ab440c15b4b53692cLukas Slebodnik return 0;
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik *value_r = str_c(parser->value);
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik return 1;
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik}
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnikstatic int json_parse_atom(struct json_parser *parser, const char *atom)
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik{
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik size_t avail, len = strlen(atom);
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik avail = parser->end - parser->data;
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik if (avail < len) {
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik if (memcmp(parser->data, atom, avail) != 0)
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik return -1;
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik /* everything matches so far, but we need more data */
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik parser->data += avail;
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik return 0;
356eef72675cde4dc5627c1e2f1a01846ec6eb1dLukas Slebodnik }
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher if (memcmp(parser->data, atom, len) != 0)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return -1;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher parser->data += len;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher return 1;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher}
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagherstatic int json_parse_denest(struct json_parser *parser)
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher{
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher const enum json_state *nested_states;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher unsigned count;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
1c7f25390572025baa6783ede14523e22fc73043Lukas Slebodnik parser->data++;
40b2be4f4312470044cdef460b02b66003f5c85fJakub Hrozek json_parser_update_input_pos(parser);
40b2be4f4312470044cdef460b02b66003f5c85fJakub Hrozek
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher nested_states = array_get(&parser->nesting, &count);
9d453f1e8b28983b363b44c49b7cd701a994fd97Nikolai Kondrashov i_assert(count > 0);
9d453f1e8b28983b363b44c49b7cd701a994fd97Nikolai Kondrashov if (count == 1) {
8c294c1cd4d721818a59684cf7f2b36123f79163Stephen Gallagher /* closing root */
8c294c1cd4d721818a59684cf7f2b36123f79163Stephen Gallagher parser->state = JSON_STATE_DONE;
5484044ea7bb632b915f706685fce509f6eacc48Jakub Hrozek if ((parser->flags & JSON_PARSER_NO_ROOT_OBJECT) == 0)
5484044ea7bb632b915f706685fce509f6eacc48Jakub Hrozek return 0;
5484044ea7bb632b915f706685fce509f6eacc48Jakub Hrozek /* we want to return the ending "]" or "}" to caller */
5484044ea7bb632b915f706685fce509f6eacc48Jakub Hrozek return 1;
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher }
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher /* closing a nested object */
551aa6c36797ed720487f5974dcadabf19e6ff9fStephen Gallagher parser->state = nested_states[count-2] == JSON_STATE_OBJECT_OPEN ?
a8d887323f83984679a7d9b827a70146656bb7b2Sumit Bose JSON_STATE_OBJECT_NEXT : JSON_STATE_ARRAY_NEXT;
a8d887323f83984679a7d9b827a70146656bb7b2Sumit Bose array_delete(&parser->nesting, count-1, 1);
a8d887323f83984679a7d9b827a70146656bb7b2Sumit Bose
96c73559adfbdac96720008fc022cb1d540b53c3Jakub Hrozek if (parser->nested_skip_count > 0) {
3be9e26dcd169d44ae105f1b8a0674464c700b77Sumit Bose parser->nested_skip_count--;
9542512d7be40f2000298c86d3d2b728f4f0f65aStephen Gallagher return 0;
428db8a58c0c149d5efccc6d788f70916c1d34d7Jakub Hrozek }
9d453f1e8b28983b363b44c49b7cd701a994fd97Nikolai Kondrashov return 1;
70e59ed31c5a9c9ed02d9065ddf92be87c887efbJakub Hrozek}
a6098862048d4bb469130b9ff21be3020d6f2c54Sumit Bose
2a9af1f71887f02935e2fb6ad5023afba5b6d43eSumit Bosestatic int
2a9af1f71887f02935e2fb6ad5023afba5b6d43eSumit Bosejson_parse_close_object(struct json_parser *parser, enum json_type *type_r)
f3c85d900c4663854cc7bbae7d9f77867ed1f69bSumit Bose{
a7e27c11866a48742bb70564b88e15bf15e9367dPavel Březina if (json_parse_denest(parser) == 0)
a7e27c11866a48742bb70564b88e15bf15e9367dPavel Březina return 0;
f1ce53a3b5656361557f80f61dfd42a371230c65Stephen Gallagher *type_r = JSON_TYPE_OBJECT_END;
1a59af8245f183f22d87d067a90197d8e2ea958dJakub Hrozek return 1;
f1ce53a3b5656361557f80f61dfd42a371230c65Stephen Gallagher}
84ae5edab16ad6be5e3be956cb6fa031c1428eb5Stephen Gallagher
static int
json_parse_close_array(struct json_parser *parser, enum json_type *type_r)
{
if (json_parse_denest(parser) == 0)
return 0;
*type_r = JSON_TYPE_ARRAY_END;
return 1;
}
static void json_parser_object_open(struct json_parser *parser)
{
parser->data++;
parser->state = JSON_STATE_OBJECT_OPEN;
array_append(&parser->nesting, &parser->state, 1);
json_parser_update_input_pos(parser);
}
static int
json_try_parse_next(struct json_parser *parser, enum json_type *type_r,
const char **value_r)
{
bool skipping = parser->skipping;
int ret;
if (!json_parse_whitespace(parser))
return -1;
switch (parser->state) {
case JSON_STATE_ROOT:
if (*parser->data != '{') {
parser->error = "Object doesn't begin with '{'";
return -1;
}
json_parser_object_open(parser);
return 0;
case JSON_STATE_OBJECT_VALUE:
case JSON_STATE_ARRAY_VALUE:
case JSON_STATE_VALUE:
if (*parser->data == '{') {
json_parser_object_open(parser);
if (parser->skipping) {
parser->nested_skip_count++;
return 0;
}
*type_r = JSON_TYPE_OBJECT;
return 1;
} else if (*parser->data == '[') {
parser->data++;
parser->state = JSON_STATE_ARRAY_OPEN;
array_append(&parser->nesting, &parser->state, 1);
json_parser_update_input_pos(parser);
if (parser->skipping) {
parser->nested_skip_count++;
return 0;
}
*type_r = JSON_TYPE_ARRAY;
return 1;
}
if ((ret = json_parse_string(parser, TRUE, value_r)) >= 0) {
*type_r = JSON_TYPE_STRING;
} else if ((ret = json_parse_number(parser, value_r)) >= 0) {
*type_r = JSON_TYPE_NUMBER;
} else if ((ret = json_parse_atom(parser, "true")) >= 0) {
*type_r = JSON_TYPE_TRUE;
*value_r = "true";
} else if ((ret = json_parse_atom(parser, "false")) >= 0) {
*type_r = JSON_TYPE_FALSE;
*value_r = "false";
} else if ((ret = json_parse_atom(parser, "null")) >= 0) {
*type_r = JSON_TYPE_NULL;
*value_r = NULL;
} else {
if (parser->error == NULL)
parser->error = "Invalid data as value";
return -1;
}
if (ret == 0) {
i_assert(parser->data == parser->end);
if (parser->skipping && *type_r == JSON_TYPE_STRING) {
/* a large string that we want to skip over. */
json_parser_update_input_pos(parser);
parser->state = parser->state == JSON_STATE_OBJECT_VALUE ?
JSON_STATE_OBJECT_SKIP_STRING :
JSON_STATE_ARRAY_SKIP_STRING;
return 0;
}
return -1;
}
switch (parser->state) {
case JSON_STATE_OBJECT_VALUE:
parser->state = JSON_STATE_OBJECT_NEXT;
break;
case JSON_STATE_ARRAY_VALUE:
parser->state = JSON_STATE_ARRAY_NEXT;
break;
case JSON_STATE_VALUE:
parser->state = JSON_STATE_DONE;
break;
default:
i_unreached();
}
break;
case JSON_STATE_OBJECT_OPEN:
if (*parser->data == '}')
return json_parse_close_object(parser, type_r);
parser->state = JSON_STATE_OBJECT_KEY;
/* fall through */
case JSON_STATE_OBJECT_KEY:
if (json_parse_string(parser, FALSE, value_r) <= 0) {
parser->error = "Expected string as object key";
return -1;
}
*type_r = JSON_TYPE_OBJECT_KEY;
parser->state = JSON_STATE_OBJECT_COLON;
break;
case JSON_STATE_OBJECT_COLON:
if (*parser->data != ':') {
parser->error = "Expected ':' after key";
return -1;
}
parser->data++;
parser->state = JSON_STATE_OBJECT_VALUE;
json_parser_update_input_pos(parser);
return 0;
case JSON_STATE_OBJECT_NEXT:
if (parser->skipping && parser->nested_skip_count == 0) {
/* we skipped over the previous value */
parser->skipping = FALSE;
}
if (*parser->data == '}')
return json_parse_close_object(parser, type_r);
if (*parser->data != ',') {
parser->error = "Expected ',' or '}' after object value";
return -1;
}
parser->state = JSON_STATE_OBJECT_KEY;
parser->data++;
json_parser_update_input_pos(parser);
return 0;
case JSON_STATE_ARRAY_OPEN:
if (*parser->data == ']')
return json_parse_close_array(parser, type_r);
parser->state = JSON_STATE_ARRAY_VALUE;
return 0;
case JSON_STATE_ARRAY_NEXT:
if (parser->skipping && parser->nested_skip_count == 0) {
/* we skipped over the previous value */
parser->skipping = FALSE;
}
/* fall through */
case JSON_STATE_ARRAY_NEXT_SKIP:
if (*parser->data == ']')
return json_parse_close_array(parser, type_r);
if (*parser->data != ',') {
parser->error = "Expected ',' or '}' after array value";
return -1;
}
parser->state = JSON_STATE_ARRAY_VALUE;
parser->data++;
json_parser_update_input_pos(parser);
return 0;
case JSON_STATE_OBJECT_SKIP_STRING:
case JSON_STATE_ARRAY_SKIP_STRING:
if (json_skip_string(parser) <= 0)
return -1;
parser->state = parser->state == JSON_STATE_OBJECT_SKIP_STRING ?
JSON_STATE_OBJECT_NEXT : JSON_STATE_ARRAY_NEXT;
return 0;
case JSON_STATE_DONE:
parser->error = "Unexpected data at the end";
return -1;
}
json_parser_update_input_pos(parser);
return skipping ? 0 : 1;
}
int json_parse_next(struct json_parser *parser, enum json_type *type_r,
const char **value_r)
{
int ret;
i_assert(parser->strinput == NULL);
*value_r = NULL;
while ((ret = json_parser_read_more(parser)) > 0) {
while ((ret = json_try_parse_next(parser, type_r, value_r)) == 0)
;
if (ret > 0)
break;
if (parser->data != parser->end)
return -1;
/* parsing probably failed because there wasn't enough input.
reset the error and try reading more. */
parser->error = NULL;
parser->highwater_offset = parser->input->v_offset +
i_stream_get_data_size(parser->input);
}
return ret;
}
void json_parse_skip_next(struct json_parser *parser)
{
i_assert(!parser->skipping);
i_assert(parser->strinput == NULL);
i_assert(parser->state == JSON_STATE_OBJECT_COLON ||
parser->state == JSON_STATE_OBJECT_VALUE ||
parser->state == JSON_STATE_ARRAY_VALUE ||
parser->state == JSON_STATE_ARRAY_NEXT);
parser->skipping = TRUE;
if (parser->state == JSON_STATE_ARRAY_NEXT)
parser->state = JSON_STATE_ARRAY_NEXT_SKIP;
}
static void json_strinput_destroyed(struct json_parser *parser)
{
i_assert(parser->strinput != NULL);
parser->strinput = NULL;
}
static int
json_try_parse_stream_start(struct json_parser *parser,
struct istream **input_r)
{
if (!json_parse_whitespace(parser))
return -1;
if (parser->state == JSON_STATE_OBJECT_COLON) {
if (*parser->data != ':') {
parser->error = "Expected ':' after key";
return -1;
}
parser->data++;
parser->state = JSON_STATE_OBJECT_VALUE;
if (!json_parse_whitespace(parser))
return -1;
}
if (*parser->data != '"')
return -1;
parser->data++;
json_parser_update_input_pos(parser);
parser->state = parser->state == JSON_STATE_OBJECT_VALUE ?
JSON_STATE_OBJECT_SKIP_STRING : JSON_STATE_ARRAY_SKIP_STRING;
parser->strinput = i_stream_create_jsonstr(parser->input);
i_stream_add_destroy_callback(parser->strinput,
json_strinput_destroyed, parser);
*input_r = parser->strinput;
return 0;
}
int json_parse_next_stream(struct json_parser *parser,
struct istream **input_r)
{
int ret;
i_assert(!parser->skipping);
i_assert(parser->strinput == NULL);
i_assert(parser->state == JSON_STATE_OBJECT_COLON ||
parser->state == JSON_STATE_OBJECT_VALUE ||
parser->state == JSON_STATE_ARRAY_VALUE);
*input_r = NULL;
while ((ret = json_parser_read_more(parser)) > 0) {
if (json_try_parse_stream_start(parser, input_r) == 0)
break;
if (parser->data != parser->end)
return -1;
/* parsing probably failed because there wasn't enough input.
reset the error and try reading more. */
parser->error = NULL;
parser->highwater_offset = parser->input->v_offset +
i_stream_get_data_size(parser->input);
}
return ret;
}
static void json_append_escaped_char(string_t *dest, unsigned char src)
{
switch (src) {
case '\b':
str_append(dest, "\\b");
break;
case '\f':
str_append(dest, "\\f");
break;
case '\n':
str_append(dest, "\\n");
break;
case '\r':
str_append(dest, "\\r");
break;
case '\t':
str_append(dest, "\\t");
break;
case '"':
str_append(dest, "\\\"");
break;
case '\\':
str_append(dest, "\\\\");
break;
default:
if (src < 0x20 || src >= 0x80)
str_printfa(dest, "\\u%04x", src);
else
str_append_c(dest, src);
break;
}
}
void json_append_escaped_ucs4(string_t *dest, unichar_t chr)
{
unichar_t high,low;
if (chr < 0x80)
json_append_escaped_char(dest, (unsigned char)chr);
else if (chr >= UTF16_SURROGATE_BASE) {
uni_split_surrogate(chr, &high, &low);
str_printfa(dest, "\\u%04x\\u%04x", high, low);
} else {
str_printfa(dest, "\\u%04x", chr);
}
}
void ostream_escaped_json_format(string_t *dest, unsigned char src)
{
json_append_escaped_char(dest, src);
}
void json_append_escaped(string_t *dest, const char *src)
{
json_append_escaped_data(dest, (const unsigned char*)src, strlen(src));
}
void json_append_escaped_data(string_t *dest, const unsigned char *src, size_t size)
{
size_t i;
int bytes = 0;
unichar_t chr;
for (i = 0; i < size;) {
bytes = uni_utf8_get_char_n(src+i, size-i, &chr);
/* refuse to add invalid data */
i_assert(bytes > 0 && uni_is_valid_ucs4(chr));
json_append_escaped_ucs4(dest, chr);
i += bytes;
}
}