message-parser.c revision f755df78a17a7c003a182594ab78b0325d929ad4
a8c5a86d183db25a57bf193c06b41e092ec2e151Timo Sirainen/* Copyright (C) 2002 Timo Sirainen */
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainen unsigned int len;
8cb72c59d5ea4e9e5f638d7ec840bb853f5a188eTimo Sirainentypedef struct {
e2ce8d4a6ac5d82a906178148453e7613fab9ba0Timo Sirainenstatic MessagePart *message_parse_part(IOBuffer *inbuf,
e2ce8d4a6ac5d82a906178148453e7613fab9ba0Timo Sirainenstatic MessagePart *message_parse_body(IOBuffer *inbuf,
cd56a23e21f1df3f79648cf07e2f4385e2fadebbTimo Sirainenstatic MessagePart *message_skip_boundary(IOBuffer *inbuf,
cd56a23e21f1df3f79648cf07e2f4385e2fadebbTimo Sirainenstatic void message_size_add_part(MessageSize *dest, MessagePart *part)
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen dest->lines += part->header_size.lines + part->body_size.lines;
e0c3d5460d1cc0c440cb7723c8c2eef8d0afe9b9Timo Sirainenstatic MessagePart *message_part_append(Pool pool, MessagePart *parent)
1a0ece3e873e3864269ed7eaed957dc10c56d25fTimo Sirainenstatic void parse_content_type(const Rfc822Token *tokens, int count,
5ac0b0bf32898c63da086ae169674ecac151a31eTimo Sirainen const char *str;
1a0ece3e873e3864269ed7eaed957dc10c56d25fTimo Sirainen parse_ctx->last_content_type = p_strdup(parse_ctx->pool, str);
1a0ece3e873e3864269ed7eaed957dc10c56d25fTimo Sirainen parse_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
1a0ece3e873e3864269ed7eaed957dc10c56d25fTimo Sirainen parse_ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
1a0ece3e873e3864269ed7eaed957dc10c56d25fTimo Sirainen else if (strncasecmp(str, "multipart/", 10) == 0) {
1a0ece3e873e3864269ed7eaed957dc10c56d25fTimo Sirainen parse_ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainenstatic void parse_content_type_param(const Rfc822Token *name,
862ec874f9373e3e499e237d3b9f71fdf1413feeTimo Sirainen const char *str;
862ec874f9373e3e499e237d3b9f71fdf1413feeTimo Sirainen if ((parse_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
862ec874f9373e3e499e237d3b9f71fdf1413feeTimo Sirainen name->len != 8 || strncasecmp(name->ptr, "boundary", 8) != 0)
862ec874f9373e3e499e237d3b9f71fdf1413feeTimo Sirainen str = rfc822_tokens_get_value(value, value_count);
862ec874f9373e3e499e237d3b9f71fdf1413feeTimo Sirainen parse_ctx->last_boundary = p_strdup(parse_ctx->pool, str);
862ec874f9373e3e499e237d3b9f71fdf1413feeTimo Sirainenstatic void parse_header_field(MessagePart *part,
7662010b03ffe5f2a6ecf4b4eb220d1c65efea76Timo Sirainen /* call the user-defined header parser */
7662010b03ffe5f2a6ecf4b4eb220d1c65efea76Timo Sirainen parse_ctx->func(part, name, name_len, value, value_len,
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen if (name_len == 12 && strncasecmp(name, "Content-Type", 12) == 0) {
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen /* we need to know the boundary */
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen (void)message_content_parse_header(t_strndup(value, value_len),
51e1a1c280ccb461a15827f7987d09cb9708b6e3Timo Sirainenstatic MessagePart *message_parse_multipart(IOBuffer *inbuf,
4334b9b032298defd4d3906f5357698ff016ead0Timo Sirainen /* multipart message. add new boundary */
4334b9b032298defd4d3906f5357698ff016ead0Timo Sirainen /* reset fields */
0892446b45c195461bb7be6599f02d97e1e2c9b2Timo Sirainen /* skip the data before the first boundary */
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen next_part = message_skip_boundary(inbuf, parse_ctx->boundaries,
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen /* now, parse the parts */
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen /* new child */
9e095dd6a77097356aca8216356d4d71ef1bea45Timo Sirainen part = message_part_append(parse_ctx->pool, parent_part);
4334b9b032298defd4d3906f5357698ff016ead0Timo Sirainen /* set child position */
0a0cd45a633112a2ae6aad801c1e6afe53ab95deTimo Sirainen next_part = message_parse_part(inbuf, parse_ctx);
0a0cd45a633112a2ae6aad801c1e6afe53ab95deTimo Sirainen /* update our size */
0a0cd45a633112a2ae6aad801c1e6afe53ab95deTimo Sirainen message_size_add_part(&parent_part->body_size, part);
d66ef20c30fee728899ee168c75fcc5ff8fbdac1Timo Sirainen /* skip the boundary */
c09f9f95db314e7482c95e502e1c56ed6c555797Timo Sirainen next_part = message_skip_boundary(inbuf, parse_ctx->boundaries,
0a0cd45a633112a2ae6aad801c1e6afe53ab95deTimo Sirainen /* remove boundary */
2524ef7b34965a1b1895d6140fd8296bf57c78d2Timo Sirainenstatic MessagePart *message_parse_part(IOBuffer *inbuf,
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen /* update message position/size */
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen hdr_size = parse_ctx->part->header_size.physical_size;
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen return message_parse_multipart(inbuf, parse_ctx);
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen /* when there's no content-type specified and we're
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen below multipart/digest, the assume message/rfc822
66dc739bb67d678770e1b7a7bc75f4f6f9523d2aTimo Sirainen content-type */
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen /* otherwise we default to text/plain */
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen parse_ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen if (parse_ctx->part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) {
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen /* message/rfc822 part - the message body begins with
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen headers again, this works pretty much the same as
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen a single multipart/mixed item */
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen part = message_part_append(parse_ctx->pool, parse_ctx->part);
61e6367a259e2473f33df42fda8ceeb3b8b48416Timo Sirainen next_part = message_parse_part(inbuf, parse_ctx);
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen /* our body size is the size of header+body in message/rfc822 */
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen message_size_add_part(&part->parent->body_size, part);
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen /* normal message, read until the next boundary */
20344c0e814139e3c365fbb9287478f91512089eTimo Sirainen next_part = message_parse_body(inbuf, parse_ctx->boundaries,
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo SirainenMessagePart *message_parse(Pool pool, IOBuffer *inbuf,
602a0434db30d8e3292d1c161a803d96a879a74fTimo Sirainen parse_ctx.part = part = p_new(pool, MessagePart, 1);
4334b9b032298defd4d3906f5357698ff016ead0Timo Sirainen/* skip over to next line increasing message size */
4334b9b032298defd4d3906f5357698ff016ead0Timo Sirainenstatic void message_skip_line(IOBuffer *inbuf, MessageSize *msg_size)
e3fc1874694a8ddba9552ec23f9952f74f33d1d5Timo Sirainen unsigned char *msg;
c0435c854a0e7246373b9752d163095cc4fbe985Timo Sirainen while (io_buffer_read_data(inbuf, &msg, &size, startpos) >= 0) {
602a0434db30d8e3292d1c161a803d96a879a74fTimo Sirainen /* leave the last character, it may be \r */
07e4875d250e7a7157cd99132aafc773cf3cdf83Timo Sirainenvoid message_parse_header(MessagePart *part, IOBuffer *inbuf,
07e4875d250e7a7157cd99132aafc773cf3cdf83Timo Sirainen unsigned char *msg;
01f4ee4a0243f3fe9af763e1a540cd5cff0d63f5Timo Sirainen unsigned int i, size, startpos, missing_cr_count;
07e4875d250e7a7157cd99132aafc773cf3cdf83Timo Sirainen unsigned int line_start, colon_pos, end_pos, name_len, value_len;
01f4ee4a0243f3fe9af763e1a540cd5cff0d63f5Timo Sirainen while ((ret = io_buffer_read_data(inbuf, &msg,
dd62b77c932d1b518f2a3e4bf80e36542becc256Timo Sirainen /* overflow, line is too long. just skip it. */
4b9f99761df5014c659cd87fddaf6854af428cfcTimo Sirainen /* no, we never want empty buffer */
7e1f68ad71d3485f1882142837b01f7a98ca8467Timo Sirainen /* don't parse the last character, so we can always have
7e1f68ad71d3485f1882142837b01f7a98ca8467Timo Sirainen one character read-ahead. we never care about the last
a3c197999dfe2b0c8ea38cb77cfa5e95026005c0Timo Sirainen character anyway, it's either the first character in
a3c197999dfe2b0c8ea38cb77cfa5e95026005c0Timo Sirainen message body, or if there's no body for any reason, it's
4106a25399703eb6cbb166dcbd5bb932cb2f7ad2Timo Sirainen the \n ending the header. */
6f08b98ac63c25b747120d0c8f8e319b4e26ab0fTimo Sirainen /* missing CR */
6f08b98ac63c25b747120d0c8f8e319b4e26ab0fTimo Sirainen /* no headers at all */
e0c3d5460d1cc0c440cb7723c8c2eef8d0afe9b9Timo Sirainen (i > 1 && msg[i-2] == '\n' && msg[i-1] == '\r')) {
e0c3d5460d1cc0c440cb7723c8c2eef8d0afe9b9Timo Sirainen /* \n\n or \n\r\n - end of headers */
e0c3d5460d1cc0c440cb7723c8c2eef8d0afe9b9Timo Sirainen /* make sure the header doesn't continue to next line */
e0c3d5460d1cc0c440cb7723c8c2eef8d0afe9b9Timo Sirainen /* we have a valid header line */
51e1a1c280ccb461a15827f7987d09cb9708b6e3Timo Sirainen /* get length of name-field */
51e1a1c280ccb461a15827f7987d09cb9708b6e3Timo Sirainen /* get length of value field. skip
51e1a1c280ccb461a15827f7987d09cb9708b6e3Timo Sirainen only the initial LWSP after ':'.
51e1a1c280ccb461a15827f7987d09cb9708b6e3Timo Sirainen some fields may want to keep
51e1a1c280ccb461a15827f7987d09cb9708b6e3Timo Sirainen the extra spaces.. */
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen /* and finally call the function */
b7651d283ca261015ef3c445f1f27f340f0864e2Timo Sirainen /* end of header */
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen /* leave the last line to buffer */
f20e7fbdc9bdbe8fecb9c661c9b8175f3bb78c69Timo Sirainen i_assert(hdr_size->virtual_size >= hdr_size->physical_size);
51e1a1c280ccb461a15827f7987d09cb9708b6e3Timo Sirainenstatic MessageBoundary *boundary_find(MessageBoundary *boundaries,
89e195dfb5c4b0efd9b9f459771a4467674e5b1fTimo Sirainen strncmp(boundaries->boundary, msg, boundaries->len) == 0)
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainen/* read until next boundary is found. if skip_over = FALSE, stop at the
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainen [\r]\n before the boundary, otherwise leave it right after the known
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainen boundary so the ending "--" can be checked. */
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainenmessage_find_boundary(IOBuffer *inbuf, MessageBoundary *boundaries,
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainen unsigned char *msg;
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainen unsigned int i, size, startpos, line_start, missing_cr_count;
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainen while (io_buffer_read_data(inbuf, &msg, &size, startpos) >= 0) {
68a4946b12583b88fa802e52ebee45cd96056772Timo Sirainen if (i > line_start+2 && msg[line_start] == '-' &&
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen /* possible boundary */
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainen /* missing CR */
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainen /* boundary found */
4106a25399703eb6cbb166dcbd5bb932cb2f7ad2Timo Sirainen msg[line_start] == '-' && msg[line_start+1] == '-') {
4106a25399703eb6cbb166dcbd5bb932cb2f7ad2Timo Sirainen /* long partial line, see if it's a boundary.
4106a25399703eb6cbb166dcbd5bb932cb2f7ad2Timo Sirainen RFC-2046 says that the boundaries must be
4106a25399703eb6cbb166dcbd5bb932cb2f7ad2Timo Sirainen 70 chars without "--" or less. We allow
4106a25399703eb6cbb166dcbd5bb932cb2f7ad2Timo Sirainen a bit larger.. */
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen /* nope, we can skip over the line, just
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen leave the last char since it may be \r */
f5982bb5b0a704e88fa2b44b0b74e365d13103b9Timo Sirainen /* leave the last line to buffer, it may be
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen /* leave the pointer right after the boundary */
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen } else if (line_start > 0 && msg[line_start-1] == '\n') {
c06f4017027263cf3a08becc551f5126409e2a83Timo Sirainen /* leave the \r\n before the boundary */
6ef7e31619edfaa17ed044b45861d106a86191efTimo Sirainen if (line_start > 0 && msg[line_start-1] == '\r')
0b47e9f5e0181053b4d9ca7b426b0e5c185e820eTimo Sirainen msg_size->virtual_size += startpos + missing_cr_count;
ecc81625167ed96c04c02aa190a1ea5baa65b474Timo Sirainen i_assert(msg_size->virtual_size >= msg_size->physical_size);
90b8f131849540fa374aede95edd86d47d35c09dTimo Sirainenstatic MessagePart *message_parse_body(IOBuffer *inbuf,
c09f9f95db314e7482c95e502e1c56ed6c555797Timo Sirainen message_get_body_size(inbuf, body_size, (uoff_t)-1);
c09f9f95db314e7482c95e502e1c56ed6c555797Timo Sirainen boundary = message_find_boundary(inbuf, boundaries,
90b8f131849540fa374aede95edd86d47d35c09dTimo Sirainen return boundary == NULL ? NULL : boundary->part;
252db51b6c0a605163326b3ea5d09e9936ca3b29Timo Sirainen/* skip data until next boundary is found. if it's end boundary,
904f9d5654b9c39edcdf32883e5e88771faf4d69Timo Sirainen skip the footer as well. */
904f9d5654b9c39edcdf32883e5e88771faf4d69Timo Sirainenstatic MessagePart *message_skip_boundary(IOBuffer *inbuf,
904f9d5654b9c39edcdf32883e5e88771faf4d69Timo Sirainen unsigned char *msg;
904f9d5654b9c39edcdf32883e5e88771faf4d69Timo Sirainen unsigned int size;
7ef5ca6fb59a318c821a852ae48a2edbb671d7ddTimo Sirainen boundary = message_find_boundary(inbuf, boundaries,
7ef5ca6fb59a318c821a852ae48a2edbb671d7ddTimo Sirainen /* now, see if it's end boundary */
7662010b03ffe5f2a6ecf4b4eb220d1c65efea76Timo Sirainen while (io_buffer_read_data(inbuf, &msg, &size, 1) >= 0) {
fe363b433b8038a69b55169da9dca27892ad7d18Timo Sirainen end_boundary = msg[0] == '-' && msg[1] == '-';
7ef5ca6fb59a318c821a852ae48a2edbb671d7ddTimo Sirainen /* skip the rest of the line */
7d5d50dd9a8c2539d7025a69e39d34fca56daeafTimo Sirainen /* skip the footer */
7d5d50dd9a8c2539d7025a69e39d34fca56daeafTimo Sirainen return message_parse_body(inbuf, boundaries, boundary_size);
7ef5ca6fb59a318c821a852ae48a2edbb671d7ddTimo Sirainen return boundary == NULL ? NULL : boundary->part;