bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2013-2018 Dovecot authors, see the included COPYING file */
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "lib.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "buffer.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "hex-dec.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "unichar.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "istream-private.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#include "istream-jsonstr.h"
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen#define MAX_UTF8_LEN 6
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstruct jsonstr_istream {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen struct istream_private istream;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* The end '"' was found */
0dffa25d211be541ee3c953b23566a1a990789dfTimo Sirainen bool str_end:1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen};
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstatic int
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Siraineni_stream_jsonstr_read_parent(struct jsonstr_istream *jstream,
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen unsigned int min_bytes)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen{
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen struct istream_private *stream = &jstream->istream;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen size_t size, avail;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen ssize_t ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen size = i_stream_get_data_size(stream->parent);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen while (size < min_bytes) {
bcd286622779a93f809b11993db0550f8c7cc9b5Timo Sirainen ret = i_stream_read_memarea(stream->parent);
81a99eefdfda5781126617d47f8fd6da67bf5543Timo Sirainen if (ret <= 0) {
81a99eefdfda5781126617d47f8fd6da67bf5543Timo Sirainen if (ret == -2) {
81a99eefdfda5781126617d47f8fd6da67bf5543Timo Sirainen /* tiny parent buffer size - shouldn't happen */
81a99eefdfda5781126617d47f8fd6da67bf5543Timo Sirainen return -2;
81a99eefdfda5781126617d47f8fd6da67bf5543Timo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.stream_errno =
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->parent->stream_errno;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.eof = stream->parent->eof;
1547bafeb11368eb80168fe2899283e8bdcb8528Timo Sirainen if (ret == -1 && stream->istream.stream_errno == 0) {
1547bafeb11368eb80168fe2899283e8bdcb8528Timo Sirainen io_stream_set_error(&stream->iostream,
1547bafeb11368eb80168fe2899283e8bdcb8528Timo Sirainen "EOF before trailing <\"> was seen");
1547bafeb11368eb80168fe2899283e8bdcb8528Timo Sirainen stream->istream.stream_errno = EPIPE;
1547bafeb11368eb80168fe2899283e8bdcb8528Timo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen size = i_stream_get_data_size(stream->parent);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (!i_stream_try_alloc(stream, size, &avail))
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -2;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return 1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen}
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstatic int
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomii_stream_json_unescape(const unsigned char *src, size_t len,
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi unsigned char *dest,
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen unsigned int *src_size_r, unsigned int *dest_size_r)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen{
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen switch (*src) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case '"':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case '\\':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case '/':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = *src;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'b':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\b';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'f':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\f';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'n':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\n';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'r':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\r';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 't':
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest = '\t';
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen case 'u': {
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi char chbuf[5] = {0};
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi unichar_t chr,chr2 = 0;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen buffer_t buf;
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi if (len < 5)
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi return 5;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen buffer_create_from_data(&buf, dest, MAX_UTF8_LEN);
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi memcpy(chbuf, src+1, 4);
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi if (str_to_uint32_hex(chbuf, &chr)<0)
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi return -1;
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi if (UTF16_VALID_LOW_SURROGATE(chr))
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi return -1;
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi /* if we encounter surrogate, we need another \\uxxxx */
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi if (UTF16_VALID_HIGH_SURROGATE(chr)) {
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi if (len < 5+2+4)
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi return 5+2+4;
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi if (src[5] != '\\' && src[6] != 'u')
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi return -1;
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi memcpy(chbuf, src+7, 4);
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi if (str_to_uint32_hex(chbuf, &chr2)<0)
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi return -1;
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi if (!UTF16_VALID_LOW_SURROGATE(chr2))
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi return -1;
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi chr = uni_join_surrogate(chr, chr2);
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi }
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi if (!uni_is_valid_ucs4(chr))
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi return -1;
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi uni_ucs4_to_utf8_c(chr, &buf);
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi *src_size_r = 5 + (chr2>0?6:0);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest_size_r = buf.used;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return 0;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen default:
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *src_size_r = 1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen *dest_size_r = 1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return 0;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen}
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstatic ssize_t i_stream_jsonstr_read(struct istream_private *stream)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen{
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen struct jsonstr_istream *jstream = (struct jsonstr_istream *)stream;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen const unsigned char *data;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen unsigned int srcskip, destskip, extra;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen size_t i, dest, size;
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi ssize_t ret, ret2;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (jstream->str_end) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.eof = TRUE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen ret = i_stream_jsonstr_read_parent(jstream, 1);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (ret <= 0)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* @UNSAFE */
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dest = stream->pos;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen extra = 0;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen data = i_stream_get_data(stream->parent, &size);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen for (i = 0; i < size && dest < stream->buffer_size; ) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (data[i] == '"') {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen jstream->str_end = TRUE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (dest == stream->pos) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.eof = TRUE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen } else if (data[i] == '\\') {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (i+1 == size) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* not enough input for \x */
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen extra = 1;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (data[i+1] == 'u' && stream->buffer_size - dest < MAX_UTF8_LEN) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* UTF8 output is max. 6 chars */
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (dest == stream->pos)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -2;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i++;
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi if ((ret2 = i_stream_json_unescape(data + i, size - i,
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi stream->w_buffer + dest,
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi &srcskip, &destskip)) < 0) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* invalid string */
1c6f6f5bef70f16546b3bc8f4cd5f93f373e82a2Timo Sirainen io_stream_set_error(&stream->iostream,
1c6f6f5bef70f16546b3bc8f4cd5f93f373e82a2Timo Sirainen "Invalid JSON string");
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->istream.stream_errno = EINVAL;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return -1;
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi } else if (ret2 > 0) {
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi /* we need to get more bytes, do not consume
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi escape slash */
c3393007354b7ab607449fea0c3d7088193ab208Aki Tuomi i--;
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi extra = ret2;
d9d28856f30b186378e687ab29fa4ba153441cd8Aki Tuomi break;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i += srcskip;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_assert(i <= size);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dest += destskip;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_assert(dest <= stream->buffer_size);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen } else {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->w_buffer[dest++] = data[i];
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i++;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_stream_skip(stream->parent, i);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen ret = dest - stream->pos;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (ret == 0) {
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen /* not enough input */
076c46c6513161e2c7bbe53d421b0830e9975fb1Timo Sirainen i_assert(i == 0);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_assert(extra > 0);
076c46c6513161e2c7bbe53d421b0830e9975fb1Timo Sirainen ret = i_stream_jsonstr_read_parent(jstream, extra+1);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen if (ret <= 0)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return i_stream_jsonstr_read(stream);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen }
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen i_assert(ret > 0);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen stream->pos = dest;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return ret;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen}
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainenstruct istream *i_stream_create_jsonstr(struct istream *input)
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen{
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen struct jsonstr_istream *dstream;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream = i_new(struct jsonstr_istream, 1);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.max_buffer_size = input->real_stream->max_buffer_size;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.read = i_stream_jsonstr_read;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.istream.readable_fd = FALSE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.istream.blocking = input->blocking;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen dstream->istream.istream.seekable = FALSE;
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen return i_stream_create(&dstream->istream, input,
2974dca6be5120e49279f06c8aa952e5fac56048Timo Sirainen i_stream_get_fd(input), 0);
7877db7b5daad125b6cb3e015574f33871c9a51bTimo Sirainen}