message-header-parser.c revision 01ca85e6e763cfa0d146166fdd60654f04f51199
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen/* Copyright (c) 2002-2012 Dovecot authors, see the included COPYING file */
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen#include "lib.h"
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen#include "buffer.h"
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainen#include "istream.h"
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen#include "str.h"
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen#include "message-size.h"
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen#include "message-header-parser.h"
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainenstruct message_header_parser_ctx {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen struct message_header_line line;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen struct istream *input;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen struct message_size *hdr_size;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen string_t *name;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen buffer_t *value_buf;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen size_t skip;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen enum message_header_parser_flags flags;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen unsigned int skip_line:1;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen unsigned int has_nuls:1;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen};
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainenstruct message_header_parser_ctx *
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainenmessage_parse_header_init(struct istream *input, struct message_size *hdr_size,
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen enum message_header_parser_flags flags)
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen{
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen struct message_header_parser_ctx *ctx;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen ctx = i_new(struct message_header_parser_ctx, 1);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen ctx->input = input;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen ctx->hdr_size = hdr_size;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen ctx->name = str_new(default_pool, 128);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen ctx->flags = flags;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen ctx->value_buf = buffer_create_dynamic(default_pool, 4096);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (hdr_size != NULL)
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen memset(hdr_size, 0, sizeof(*hdr_size));
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen return ctx;
c14ceb2cabfd4934243d09ff2dfbc24791ef4eb2Timo Sirainen}
c14ceb2cabfd4934243d09ff2dfbc24791ef4eb2Timo Sirainen
c14ceb2cabfd4934243d09ff2dfbc24791ef4eb2Timo Sirainenvoid message_parse_header_deinit(struct message_header_parser_ctx **_ctx)
c14ceb2cabfd4934243d09ff2dfbc24791ef4eb2Timo Sirainen{
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen struct message_header_parser_ctx *ctx = *_ctx;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen i_stream_skip(ctx->input, ctx->skip);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen buffer_free(&ctx->value_buf);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen str_free(&ctx->name);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen i_free(ctx);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen *_ctx = NULL;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen}
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainenint message_parse_header_next(struct message_header_parser_ctx *ctx,
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen struct message_header_line **hdr_r)
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen{
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen struct message_header_line *line = &ctx->line;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen const unsigned char *msg;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen size_t i, size, startpos, colon_pos, parse_size;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen int ret;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen bool continued, continues, last_no_newline, last_crlf;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen bool no_newline, crlf_newline;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen *hdr_r = NULL;
11352dc3e4b29f3d2763c82f8ea4f99e8daf4fa3Timo Sirainen if (line->eoh)
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen return -1;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (ctx->skip > 0) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen i_stream_skip(ctx->input, ctx->skip);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen ctx->skip = 0;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (line->continues)
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen colon_pos = 0;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen else {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen /* new header line */
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen line->name_offset = ctx->input->v_offset;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen colon_pos = UINT_MAX;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen buffer_set_used_size(ctx->value_buf, 0);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen no_newline = FALSE;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen crlf_newline = FALSE;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen continued = line->continues;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen continues = FALSE;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen for (startpos = 0;;) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen ret = i_stream_read_data(ctx->input, &msg, &size, startpos+1);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (ret >= 0) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen /* we want to know one byte in advance to find out
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if it's multiline header */
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen parse_size = size == 0 ? 0 : size-1;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen } else {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen parse_size = size;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (ret <= 0 && startpos == parse_size) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (ret == -1) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (startpos > 0) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen /* header ended unexpectedly. */
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen no_newline = TRUE;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen ctx->skip = startpos;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen break;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen /* error / EOF with no bytes */
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen return -1;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (size > 0 && !ctx->skip_line && !continued &&
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen (msg[0] == '\n' ||
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen (msg[0] == '\r' && size > 1 && msg[1] == '\n'))) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen /* end of headers - this mostly happens just
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen with mbox where headers are read separately
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen from body */
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen size = 0;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (ctx->hdr_size != NULL)
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->hdr_size->lines++;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (msg[0] == '\r') {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->skip = 2;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen crlf_newline = TRUE;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen } else {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->skip = 1;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (ctx->hdr_size != NULL)
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->hdr_size->virtual_size++;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen break;
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainen }
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainen if (ret == 0 && !ctx->input->eof) {
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainen /* stream is nonblocking - need more data */
636f017be100bce67d66fd3ae1544a47681efd33Timo Sirainen return 0;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen i_assert(size > 0);
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* a) line is larger than input buffer
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen b) header ended unexpectedly */
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (ret == -2) {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* go back to last LWSP if found. */
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen size_t min_pos = !continued ? colon_pos : 0;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen for (i = size-1; i > min_pos; i--) {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (IS_LWSP(msg[i])) {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen size = i;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen break;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (i == min_pos && (msg[size-1] == '\r' ||
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen msg[size-1] == '\n')) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen /* we may or may not have a full header,
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen but we don't know until we get the
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen next character. leave out the
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen linefeed and finish the header on
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen the next run. */
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen size--;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (size > 0 && msg[size-1] == '\r')
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen size--;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* the buffer really has to be more than 2 to
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen avoid CRLF looping forever */
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen i_assert(size > 0);
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen continues = TRUE;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen no_newline = TRUE;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->skip = size;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen break;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen /* find ':' */
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (colon_pos == UINT_MAX) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen for (i = startpos; i < parse_size; i++) {
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen if (msg[i] > ':')
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen continue;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen if (msg[i] == ':' && !ctx->skip_line) {
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen colon_pos = i;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen line->full_value_offset =
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->input->v_offset + i + 1;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen break;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (msg[i] == '\n') {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* end of headers, or error */
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen break;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (msg[i] == '\0')
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->has_nuls = TRUE;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen } else {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen i = startpos;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* find '\n' */
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen for (; i < parse_size; i++) {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (msg[i] <= '\n') {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (msg[i] == '\n')
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen break;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (msg[i] == '\0')
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->has_nuls = TRUE;
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (i < parse_size && i+1 == size && ret == -2) {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* we don't know if the line continues. */
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen i++;
c0a87e5f3316a57e6f915882fa1951d0fbb74a61Timo Sirainen } else if (i < parse_size) {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* got a line */
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (ctx->skip_line) {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* skipping a line with a huge header name */
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (ctx->hdr_size != NULL) {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->hdr_size->lines++;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->hdr_size->physical_size += i + 1;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->hdr_size->virtual_size += i + 1;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (i == 0 || msg[i-1] != '\r') {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* missing CR */
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (ctx->hdr_size != NULL)
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->hdr_size->virtual_size++;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen i_stream_skip(ctx->input, i + 1);
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen startpos = 0;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->skip_line = FALSE;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen continue;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen continues = i+1 < size && IS_LWSP(msg[i+1]);
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (ctx->hdr_size != NULL)
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->hdr_size->lines++;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (i == 0 || msg[i-1] != '\r') {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen /* missing CR */
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen if (ctx->hdr_size != NULL)
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->hdr_size->virtual_size++;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen size = i;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen } else {
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen size = i-1;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen crlf_newline = TRUE;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen ctx->skip = i+1;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen break;
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen }
9bd607718368ffb39bcfbc82010073364901c5a2Timo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen startpos = i;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen }
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen last_crlf = line->crlf_newline &&
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_DROP_CR) == 0;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen last_no_newline = line->no_newline ||
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) != 0;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen line->continues = continues;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen line->continued = continued;
a6e79dffa06db28bcfad9c1e5fc819c48172d5deTimo Sirainen line->crlf_newline = crlf_newline;
line->no_newline = no_newline;
if (size == 0 && !continued) {
/* end of headers */
line->eoh = TRUE;
line->name_len = line->value_len = line->full_value_len = 0;
line->name = ""; line->value = line->full_value = NULL;
line->middle = NULL; line->middle_len = 0;
line->full_value_offset = line->name_offset;
line->continues = FALSE;
} else if (line->continued) {
line->value = msg;
line->value_len = size;
} else if (colon_pos == UINT_MAX) {
/* missing ':', assume the whole line is name */
line->value = NULL;
line->value_len = 0;
str_truncate(ctx->name, 0);
buffer_append(ctx->name, msg, size);
line->name = str_c(ctx->name);
line->name_len = str_len(ctx->name);
line->middle = NULL;
line->middle_len = 0;
} else {
size_t pos;
line->value = msg + colon_pos+1;
line->value_len = size - colon_pos - 1;
if (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP) {
/* get value. skip all LWSP after ':'. Note that
RFC2822 doesn't say we should, but history behind
it..
Exception to this is if the value consists only of
LWSP, then skip only the one LWSP after ':'. */
for (pos = 0; pos < line->value_len; pos++) {
if (!IS_LWSP(line->value[pos]))
break;
}
if (pos == line->value_len) {
/* everything was LWSP */
if (line->value_len > 0 &&
IS_LWSP(line->value[0]))
pos = 1;
}
} else {
pos = line->value_len > 0 &&
IS_LWSP(line->value[0]) ? 1 : 0;
}
line->value += pos;
line->value_len -= pos;
line->full_value_offset += pos;
/* get name, skip LWSP before ':' */
while (colon_pos > 0 && IS_LWSP(msg[colon_pos-1]))
colon_pos--;
str_truncate(ctx->name, 0);
/* use buffer_append() so the name won't be truncated if there
are NULs. */
buffer_append(ctx->name, msg, colon_pos);
str_append_c(ctx->name, '\0');
/* keep middle stored also in ctx->name so it's available
with use_full_value */
line->middle = msg + colon_pos;
line->middle_len = (size_t)(line->value - line->middle);
str_append_n(ctx->name, line->middle, line->middle_len);
line->name = str_c(ctx->name);
line->name_len = colon_pos;
line->middle = str_data(ctx->name) + line->name_len + 1;
}
if (!line->continued) {
/* first header line. make a copy of the line since we can't
really trust input stream not to lose it. */
buffer_append(ctx->value_buf, line->value, line->value_len);
line->value = line->full_value = ctx->value_buf->data;
line->full_value_len = line->value_len;
} else if (line->use_full_value) {
/* continue saving the full value. */
if (last_no_newline) {
/* line is longer than fit into our buffer, so we
were forced to break it into multiple
message_header_lines */
} else {
if (last_crlf)
buffer_append_c(ctx->value_buf, '\r');
buffer_append_c(ctx->value_buf, '\n');
}
if ((ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) &&
line->value_len > 0 && line->value[0] != ' ' &&
IS_LWSP(line->value[0])) {
buffer_append_c(ctx->value_buf, ' ');
buffer_append(ctx->value_buf,
line->value + 1, line->value_len - 1);
} else {
buffer_append(ctx->value_buf,
line->value, line->value_len);
}
line->full_value = buffer_get_data(ctx->value_buf,
&line->full_value_len);
} else {
/* we didn't want full_value, and this is a continued line. */
line->full_value = NULL;
line->full_value_len = 0;
}
/* always reset it */
line->use_full_value = FALSE;
if (ctx->hdr_size != NULL) {
ctx->hdr_size->physical_size += ctx->skip;
ctx->hdr_size->virtual_size += ctx->skip;
}
*hdr_r = line;
return 1;
}
bool message_parse_header_has_nuls(const struct message_header_parser_ctx *ctx)
{
return ctx->has_nuls;
}
#undef message_parse_header
void message_parse_header(struct istream *input, struct message_size *hdr_size,
enum message_header_parser_flags flags,
message_header_callback_t *callback, void *context)
{
struct message_header_parser_ctx *hdr_ctx;
struct message_header_line *hdr;
int ret;
hdr_ctx = message_parse_header_init(input, hdr_size, flags);
while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0)
callback(hdr, context);
i_assert(ret != 0);
message_parse_header_deinit(&hdr_ctx);
/* call after the final skipping */
callback(NULL, context);
}