message-header-parser.c revision 6cb3c4f4276531258be706821e034f1f0a8cd276
2e37d45867d081db150ab78dad303b9077aea24fTimo Sirainen/* Copyright (c) 2002-2012 Dovecot authors, see the included COPYING file */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen#include "lib.h"
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen#include "buffer.h"
0536ccb51d41e3078c3a9fa33e509fb4b2420f95Timo Sirainen#include "istream.h"
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen#include "str.h"
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen#include "message-size.h"
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen#include "message-header-parser.h"
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainenstruct message_header_parser_ctx {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen struct message_header_line line;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen struct istream *input;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen struct message_size *hdr_size;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen string_t *name;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen buffer_t *value_buf;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen size_t skip;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen enum message_header_parser_flags flags;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen unsigned int skip_line:1;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen unsigned int has_nuls:1;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen};
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainenstruct message_header_parser_ctx *
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainenmessage_parse_header_init(struct istream *input, struct message_size *hdr_size,
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen enum message_header_parser_flags flags)
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen{
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen struct message_header_parser_ctx *ctx;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx = i_new(struct message_header_parser_ctx, 1);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->input = input;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->hdr_size = hdr_size;
5a0ac2e5ef482016e00575a7dce83f52c1704732Timo Sirainen ctx->name = str_new(default_pool, 128);
5a0ac2e5ef482016e00575a7dce83f52c1704732Timo Sirainen ctx->flags = flags;
7823ef73e51bb81a17dcb306aff89016d4ce258fTimo Sirainen ctx->value_buf = buffer_create_dynamic(default_pool, 4096);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (hdr_size != NULL)
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen memset(hdr_size, 0, sizeof(*hdr_size));
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen return ctx;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen}
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainenvoid message_parse_header_deinit(struct message_header_parser_ctx **_ctx)
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen{
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen struct message_header_parser_ctx *ctx = *_ctx;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen i_stream_skip(ctx->input, ctx->skip);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen buffer_free(&ctx->value_buf);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen str_free(&ctx->name);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen i_free(ctx);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen *_ctx = NULL;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen}
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainenint message_parse_header_next(struct message_header_parser_ctx *ctx,
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen struct message_header_line **hdr_r)
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen{
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen struct message_header_line *line = &ctx->line;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen const unsigned char *msg;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen size_t i, size, startpos, colon_pos, parse_size;
4ce6338bf945cccfff9e4ce7cc6aa2246851b84aTimo Sirainen int ret;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen bool continued, continues, last_no_newline, last_crlf;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen bool no_newline, crlf_newline;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen *hdr_r = NULL;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen if (line->eoh)
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen return -1;
4ce6338bf945cccfff9e4ce7cc6aa2246851b84aTimo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (ctx->skip > 0) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen i_stream_skip(ctx->input, ctx->skip);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->skip = 0;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen }
64055bc6d2ed9e25b3b1db3b5b90d0bdb77cd715Timo Sirainen
64055bc6d2ed9e25b3b1db3b5b90d0bdb77cd715Timo Sirainen if (line->continues)
64055bc6d2ed9e25b3b1db3b5b90d0bdb77cd715Timo Sirainen colon_pos = 0;
64055bc6d2ed9e25b3b1db3b5b90d0bdb77cd715Timo Sirainen else {
64055bc6d2ed9e25b3b1db3b5b90d0bdb77cd715Timo Sirainen /* new header line */
64055bc6d2ed9e25b3b1db3b5b90d0bdb77cd715Timo Sirainen line->name_offset = ctx->input->v_offset;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen colon_pos = UINT_MAX;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen buffer_set_used_size(ctx->value_buf, 0);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen
4ce6338bf945cccfff9e4ce7cc6aa2246851b84aTimo Sirainen no_newline = FALSE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen crlf_newline = FALSE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen continued = line->continues;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen continues = FALSE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen for (startpos = 0;;) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ret = i_stream_read_data(ctx->input, &msg, &size, startpos+1);
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen if (ret >= 0) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* we want to know one byte in advance to find out
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if it's multiline header */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen parse_size = size == 0 ? 0 : size-1;
7026c16186f543e11af12b8b87f396006db93297Timo Sirainen } else {
7026c16186f543e11af12b8b87f396006db93297Timo Sirainen parse_size = size;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
7026c16186f543e11af12b8b87f396006db93297Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (ret <= 0 && startpos == parse_size) {
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen if (ret == -1) {
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen if (startpos > 0) {
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen /* header ended unexpectedly. */
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen no_newline = TRUE;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen ctx->skip = startpos;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen break;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen }
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen /* error / EOF with no bytes */
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen return -1;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen }
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen if (size > 0 && !ctx->skip_line &&
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen (msg[0] == '\n' ||
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen (msg[0] == '\r' && size > 1 && msg[1] == '\n'))) {
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen /* end of headers - this mostly happens just
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen with mbox where headers are read separately
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen from body */
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen size = 0;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (ctx->hdr_size != NULL)
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen ctx->hdr_size->lines++;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (msg[0] == '\r') {
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen ctx->skip = 2;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen crlf_newline = TRUE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen } else {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->skip = 1;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen if (ctx->hdr_size != NULL)
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->hdr_size->virtual_size++;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen break;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
64c48ffb71f1cf99acf375768fde4cff9b512648Timo Sirainen if (ret == 0 && !ctx->input->eof) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* stream is nonblocking - need more data */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen return 0;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen i_assert(size > 0);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* a) line is larger than input buffer
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen b) header ended unexpectedly */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (colon_pos == UINT_MAX && ret == -2 && !continued) {
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen /* header name is huge. just skip it. */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen i_assert(size > 1);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (msg[size-1] == '\r')
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen size--;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen
4ce6338bf945cccfff9e4ce7cc6aa2246851b84aTimo Sirainen if (ctx->hdr_size != NULL) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->hdr_size->physical_size += size;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->hdr_size->virtual_size += size;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
c09f9f95db314e7482c95e502e1c56ed6c555797Timo Sirainen i_stream_skip(ctx->input, size);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->skip_line = TRUE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen startpos = 0;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen continue;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (ret == -2) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* go back to last LWSP if found. */
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen size_t min_pos = !continued ? colon_pos : 0;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen for (i = size-1; i > min_pos; i--) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (IS_LWSP(msg[i])) {
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen size = i;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen break;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
4ce6338bf945cccfff9e4ce7cc6aa2246851b84aTimo Sirainen }
4ce6338bf945cccfff9e4ce7cc6aa2246851b84aTimo Sirainen if (i == min_pos && (msg[size-1] == '\r' ||
4ce6338bf945cccfff9e4ce7cc6aa2246851b84aTimo Sirainen msg[size-1] == '\n')) {
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen /* we may or may not have a full header,
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen but we don't know until we get the
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen next character. leave out the
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen linefeed and finish the header on
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen the next run. */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen size--;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (size > 0 && msg[size-1] == '\r')
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen size--;
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen continues = TRUE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen no_newline = TRUE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->skip = size;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen break;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* find ':' */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (colon_pos == UINT_MAX) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen for (i = startpos; i < parse_size; i++) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (msg[i] > ':')
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen continue;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (msg[i] == ':' && !ctx->skip_line) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen colon_pos = i;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->full_value_offset =
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->input->v_offset + i + 1;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen break;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (msg[i] == '\n') {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* end of headers, or error */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen break;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (msg[i] == '\0')
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->has_nuls = TRUE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen } else {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen i = startpos;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* find '\n' */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen for (; i < parse_size; i++) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (msg[i] <= '\n') {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (msg[i] == '\n')
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen break;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (msg[i] == '\0')
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->has_nuls = TRUE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen if (i < parse_size && i+1 == size && ret == -2) {
beffc30d933c5e134c45cc871852a8427eba7e70Timo Sirainen /* we don't know if the line continues. */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen i++;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen } else if (i < parse_size) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* got a line */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (ctx->skip_line) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* skipping a line with a huge header name */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (ctx->hdr_size != NULL) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->hdr_size->lines++;
7026c16186f543e11af12b8b87f396006db93297Timo Sirainen ctx->hdr_size->physical_size += i + 1;
7026c16186f543e11af12b8b87f396006db93297Timo Sirainen ctx->hdr_size->virtual_size += i + 1;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (i == 0 || msg[i-1] != '\r') {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* missing CR */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (ctx->hdr_size != NULL)
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->hdr_size->virtual_size++;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen i_stream_skip(ctx->input, i + 1);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen startpos = 0;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->skip_line = FALSE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen continue;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen continues = i+1 < size && IS_LWSP(msg[i+1]);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (ctx->hdr_size != NULL)
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->hdr_size->lines++;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (i == 0 || msg[i-1] != '\r') {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* missing CR */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (ctx->hdr_size != NULL)
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen ctx->hdr_size->virtual_size++;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen size = i;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen } else {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen size = i-1;
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen crlf_newline = TRUE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen ctx->skip = i+1;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen break;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen startpos = i;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen }
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen last_crlf = line->crlf_newline &&
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_DROP_CR) == 0;
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen last_no_newline = line->no_newline ||
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) != 0;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->continues = continues;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->continued = continued;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->crlf_newline = crlf_newline;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->no_newline = no_newline;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen if (size == 0) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* end of headers */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->eoh = TRUE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->name_len = line->value_len = line->full_value_len = 0;
4ce6338bf945cccfff9e4ce7cc6aa2246851b84aTimo Sirainen line->name = ""; line->value = line->full_value = NULL;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->middle = NULL; line->middle_len = 0;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->full_value_offset = line->name_offset;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->continues = FALSE;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen } else if (line->continued) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->value = msg;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->value_len = size;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen } else if (colon_pos == UINT_MAX) {
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen /* missing ':', assume the whole line is name */
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->value = NULL;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen line->value_len = 0;
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
da2aa032ccfa8e7e4a4380ef738014549f4d2c2dTimo Sirainen str_truncate(ctx->name, 0);
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen buffer_append(ctx->name, msg, size);
ab7b5b9286104974c2a572a499ccf8b56c5d2955Timo Sirainen line->name = str_c(ctx->name);
9511a40d933181045343110c8101b75887062aaeTimo Sirainen line->name_len = str_len(ctx->name);
f89cb43088c8b46d12d66ac924724b53ab14ce66Timo Sirainen
line->middle = NULL;
line->middle_len = 0;
} else {
size_t pos;
line->value = msg + colon_pos+1;
line->value_len = size - colon_pos - 1;
if (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP) {
/* get value. skip all LWSP after ':'. Note that
RFC2822 doesn't say we should, but history behind
it..
Exception to this is if the value consists only of
LWSP, then skip only the one LWSP after ':'. */
for (pos = 0; pos < line->value_len; pos++) {
if (!IS_LWSP(line->value[pos]))
break;
}
if (pos == line->value_len) {
/* everything was LWSP */
if (line->value_len > 0 &&
IS_LWSP(line->value[0]))
pos = 1;
}
} else {
pos = line->value_len > 0 &&
IS_LWSP(line->value[0]) ? 1 : 0;
}
line->value += pos;
line->value_len -= pos;
line->full_value_offset += pos;
/* get name, skip LWSP before ':' */
while (colon_pos > 0 && IS_LWSP(msg[colon_pos-1]))
colon_pos--;
str_truncate(ctx->name, 0);
/* use buffer_append() so the name won't be truncated if there
are NULs. */
buffer_append(ctx->name, msg, colon_pos);
str_append_c(ctx->name, '\0');
/* keep middle stored also in ctx->name so it's available
with use_full_value */
line->middle = msg + colon_pos;
line->middle_len = (size_t)(line->value - line->middle);
str_append_n(ctx->name, line->middle, line->middle_len);
line->name = str_c(ctx->name);
line->name_len = colon_pos;
line->middle = str_data(ctx->name) + line->name_len + 1;
}
if (!line->continued) {
/* first header line. make a copy of the line since we can't
really trust input stream not to lose it. */
buffer_append(ctx->value_buf, line->value, line->value_len);
line->value = line->full_value = ctx->value_buf->data;
line->full_value_len = line->value_len;
} else if (line->use_full_value) {
/* continue saving the full value. */
if (last_no_newline) {
/* line is longer than fit into our buffer, so we
were forced to break it into multiple
message_header_lines */
} else {
if (last_crlf)
buffer_append_c(ctx->value_buf, '\r');
buffer_append_c(ctx->value_buf, '\n');
}
if ((ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) &&
line->value_len > 0 && line->value[0] != ' ' &&
IS_LWSP(line->value[0])) {
buffer_append_c(ctx->value_buf, ' ');
buffer_append(ctx->value_buf,
line->value + 1, line->value_len - 1);
} else {
buffer_append(ctx->value_buf,
line->value, line->value_len);
}
line->full_value = buffer_get_data(ctx->value_buf,
&line->full_value_len);
} else {
/* we didn't want full_value, and this is a continued line. */
line->full_value = NULL;
line->full_value_len = 0;
}
/* always reset it */
line->use_full_value = FALSE;
if (ctx->hdr_size != NULL) {
ctx->hdr_size->physical_size += ctx->skip;
ctx->hdr_size->virtual_size += ctx->skip;
}
*hdr_r = line;
return 1;
}
bool message_parse_header_has_nuls(const struct message_header_parser_ctx *ctx)
{
return ctx->has_nuls;
}
#undef message_parse_header
void message_parse_header(struct istream *input, struct message_size *hdr_size,
enum message_header_parser_flags flags,
message_header_callback_t *callback, void *context)
{
struct message_header_parser_ctx *hdr_ctx;
struct message_header_line *hdr;
int ret;
hdr_ctx = message_parse_header_init(input, hdr_size, flags);
while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0)
callback(hdr, context);
i_assert(ret != 0);
message_parse_header_deinit(&hdr_ctx);
/* call after the final skipping */
callback(NULL, context);
}