istream-jsonstr.c revision 076c46c6513161e2c7bbe53d421b0830e9975fb1
02c335c23bf5fa225a467c19f2c063fb0dc7b8c3Timo Sirainen/* Copyright (c) 2013-2016 Dovecot authors, see the included COPYING file */
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "lib.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "buffer.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "hex-dec.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "unichar.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "istream-private.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "istream-jsonstr.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#define MAX_UTF8_LEN 6
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstruct jsonstr_istream {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen struct istream_private istream;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* The end '"' was found */
0dffa25d211be541ee3c953b23566a1a990789dfTimo Sirainen bool str_end:1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen};
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstatic int
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Siraineni_stream_jsonstr_read_parent(struct jsonstr_istream *jstream,
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen unsigned int min_bytes)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen{
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen struct istream_private *stream = &jstream->istream;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen size_t size, avail;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen ssize_t ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen size = i_stream_get_data_size(stream->parent);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen while (size < min_bytes) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen ret = i_stream_read(stream->parent);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (ret <= 0 && (ret != -2 || stream->skip == 0)) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.stream_errno =
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->parent->stream_errno;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.eof = stream->parent->eof;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen size = i_stream_get_data_size(stream->parent);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (!i_stream_try_alloc(stream, size, &avail))
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -2;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return 1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen}
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstatic int
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Siraineni_stream_json_unescape(const unsigned char *src, unsigned char *dest,
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen unsigned int *src_size_r, unsigned int *dest_size_r)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen{
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen switch (*src) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case '"':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case '\\':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case '/':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = *src;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'b':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\b';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'f':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\f';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'n':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\n';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'r':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\r';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 't':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\t';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'u': {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen buffer_t buf;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen buffer_create_from_data(&buf, dest, MAX_UTF8_LEN);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen uni_ucs4_to_utf8_c(hex2dec(src+1, 4), &buf);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *src_size_r = 5;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest_size_r = buf.used;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return 0;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen default:
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *src_size_r = 1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest_size_r = 1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return 0;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen}
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstatic ssize_t i_stream_jsonstr_read(struct istream_private *stream)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen{
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen struct jsonstr_istream *jstream = (struct jsonstr_istream *)stream;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen const unsigned char *data;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen unsigned int srcskip, destskip, extra;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen size_t i, dest, size;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen ssize_t ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (jstream->str_end) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.eof = TRUE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen ret = i_stream_jsonstr_read_parent(jstream, 1);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (ret <= 0)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* @UNSAFE */
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dest = stream->pos;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen extra = 0;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen data = i_stream_get_data(stream->parent, &size);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen for (i = 0; i < size && dest < stream->buffer_size; ) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (data[i] == '"') {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen jstream->str_end = TRUE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (dest == stream->pos) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.eof = TRUE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen } else if (data[i] == '\\') {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (i+1 == size) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* not enough input for \x */
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen extra = 1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if ((data[i+1] == 'u' && i+1+4 >= size)) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* not enough input for \u0000 */
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen extra = 5;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (data[i+1] == 'u' && stream->buffer_size - dest < MAX_UTF8_LEN) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* UTF8 output is max. 6 chars */
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (dest == stream->pos)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -2;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i++;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (i_stream_json_unescape(data + i,
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->w_buffer + dest,
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen &srcskip, &destskip) < 0) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* invalid string */
1c6f6f5bef70f16546b3bc8f4cd5f93f373e82a2Timo Sirainen io_stream_set_error(&stream->iostream,
1c6f6f5bef70f16546b3bc8f4cd5f93f373e82a2Timo Sirainen "Invalid JSON string");
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.stream_errno = EINVAL;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i += srcskip;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_assert(i <= size);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dest += destskip;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_assert(dest <= stream->buffer_size);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen } else {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->w_buffer[dest++] = data[i];
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i++;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_stream_skip(stream->parent, i);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen ret = dest - stream->pos;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (ret == 0) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* not enough input */
076c46c6513161e2c7bbe53d421b0830e9975fb1Timo Sirainen i_assert(i == 0);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_assert(extra > 0);
076c46c6513161e2c7bbe53d421b0830e9975fb1Timo Sirainen ret = i_stream_jsonstr_read_parent(jstream, extra+1);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (ret <= 0)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return i_stream_jsonstr_read(stream);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_assert(ret > 0);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->pos = dest;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen}
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstruct istream *i_stream_create_jsonstr(struct istream *input)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen{
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen struct jsonstr_istream *dstream;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream = i_new(struct jsonstr_istream, 1);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.max_buffer_size = input->real_stream->max_buffer_size;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.read = i_stream_jsonstr_read;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.istream.readable_fd = FALSE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.istream.blocking = input->blocking;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.istream.seekable = FALSE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return i_stream_create(&dstream->istream, input,
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_stream_get_fd(input));
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen}