message-parser.c revision 11da9baca9749e1645f217bb64ba5c98ce74033a
/* Copyright (C) 2002 Timo Sirainen */
#include "lib.h"
#include "buffer.h"
#include "istream.h"
#include "str.h"
#include "strescape.h"
#include "message-content-parser.h"
#include "message-parser.h"
#include "message-size.h"
struct message_boundary {
struct message_boundary *next;
struct message_part *part;
const char *boundary;
};
struct message_parser_ctx {
char *last_boundary;
char *last_content_type;
struct message_boundary *boundaries;
void *context;
};
struct message_header_parser_ctx {
struct message_header_line line;
struct message_size *hdr_size;
int skip_initial_lwsp;
int has_nuls;
};
static void
static struct message_part *
static struct message_part *
struct message_boundary *boundaries,
static struct message_part *
struct message_boundary *boundaries,
struct message_part *part)
{
dest->physical_size +=
dest->virtual_size +=
}
static struct message_part *
{
/* set child position */
part->physical_pos =
return part;
}
void *context)
{
const char *str;
return;
}
}
}
static void
int value_quoted, void *context)
{
return;
if (value_quoted)
}
}
static struct message_part *
{
struct message_boundary *b;
int has_nuls;
/* multipart message. add new boundary */
parser_ctx->boundaries = b;
/* reset fields */
/* skip the data before the first boundary */
if (has_nuls)
/* now, parse the parts */
while (next_part == parent_part) {
/* new child */
/* it also belongs to parent */
}
/* update our size */
if (next_part != parent_part)
break;
/* skip the boundary */
&has_nuls);
if (has_nuls)
}
/* remove boundary */
return next_part;
}
#define MUTEX_FLAGS \
{
struct message_header_parser_ctx *hdr_ctx;
struct message_header_line *hdr;
int ret;
/* call the user-defined header parser */
/* it's MIME. Content-* headers are valid */
}
continue;
}
/* we need to know the boundary */
}
}
/* It's not MIME. Reset everything we found from
Content-Type. */
}
}
static struct message_part *
{
struct message_part *next_part;
int has_nuls;
return message_parse_multipart(parser_ctx);
/* when there's no content-type specified and we're
content-type */
} else {
}
}
headers again, this works pretty much the same as
} else {
/* normal message, read until the next boundary */
if (has_nuls)
}
/* it also belongs to parent */
}
return next_part;
}
int *has_nuls)
{
const unsigned char *msg;
startpos = 0;
if (msg[i] == '\0')
else if (msg[i] == '\n') {
if (!skip_lf) {
i--;
startpos = i;
goto __break;
}
msg_size->virtual_size++;
}
startpos = i+1;
goto __break;
}
}
/* leave the last character, it may be \r */
startpos = 1;
}
}
}
}
static struct message_boundary *
{
while (boundaries != NULL) {
return boundaries;
}
return NULL;
}
/* read until next boundary is found. stops at the [\r]\n before the
boundary. */
static struct message_boundary *
struct message_boundary *boundaries,
{
struct message_boundary *boundary;
const unsigned char *msg;
if (msg[i] != '\n') {
if (msg[i] == '\0')
continue;
}
/* possible boundary */
i - line_start - 2);
break;
}
/* missing CR */
}
line_start = i+1;
}
break;
/* continued long line, continue skipping over it */
} else if (i - line_start > 128) {
/* long partial line, see if it's a boundary.
RFC-2046 says that the boundaries must be
70 chars without "--" or less. We allow
a bit larger.. */
i - line_start - 2);
break;
}
/* nope, we can skip over the line, just
leave the last char since it may be \r */
i--;
} else {
/* leave the last line to buffer, it may be
boundary */
i = line_start;
if (i > 0) i--; /* leave the \r\n too */
if (i > 0) i--;
line_start -= i;
}
i_stream_skip(input, i);
msg_size->physical_size += i;
msg_size->virtual_size += i;
}
/* possible boundary without line feed at end */
}
/* leave the \r\n before the boundary */
line_start--;
line_start--;
else
}
}
return boundary;
}
static struct message_part *
struct message_boundary *boundaries,
{
struct message_boundary *boundary;
struct message_size body_size;
if (boundaries == NULL) {
} else {
}
}
/* skip data until next boundary is found. if it's end boundary,
skip the footer as well. */
static struct message_part *
struct message_boundary *boundaries,
{
struct message_boundary *boundary;
const unsigned char *msg;
return NULL;
/* skip over to beginning of next line.
size = "\r\n" + "--" + boundary + "--" */
if (msg[0] == '\r') {
}
if (msg[0] == '\n') {
line_count++;
}
/* now, see if it's end boundary. */
/* now, the boundary we found may not be what we expected.
change boundary_size to be the found boundary's parent part */
/* skip the rest of the line. we probably have to skip two lines
because input is positioned to end of boundary's previous line */
for (i = 0; i < line_count; i++) {
!end_boundary, has_nuls);
}
if (end_boundary) {
/* skip the footer */
}
}
struct message_parser_ctx *
{
struct message_parser_ctx *ctx;
return ctx;
}
{
return parts;
}
struct message_size *hdr_size,
void *context)
{
}
void *context)
{
}
void *context)
{
/* note that we want to parse the header of all
the message parts, multiparts too. */
}
}
}
void *context)
{
}
struct message_size *hdr_size,
{
struct message_header_parser_ctx *hdr_ctx;
struct message_header_line *hdr;
int ret;
/* call after the final skipping */
}
struct message_header_parser_ctx *
int skip_initial_lwsp)
{
struct message_header_parser_ctx *ctx;
return ctx;
}
{
}
struct message_header_line **hdr_r)
{
const unsigned char *msg;
int ret, last_no_newline;
return -1;
}
/* save the first line */
else {
4096);
}
}
colon_pos = 0;
} else {
/* new header line */
}
for (;;) {
if (ret > 0) {
/* we want to know one byte in advance to find out
if it's multiline header */
} else {
parse_size = size;
}
if (ret == -1) {
if (startpos > 0) {
/* header ended unexpectedly. */
break;
}
/* error / EOF with no bytes */
return -1;
}
/* stream is nonblocking - need more data */
return 0;
}
if (msg[0] == '\n' ||
/* end of headers - this mostly happens just
with mbox where headers are read separately
from body */
size = 0;
if (msg[0] == '\r')
else {
}
break;
}
/* a) line is larger than input buffer
b) header ended unexpectedly */
/* header name is huge. just skip it. */
startpos = 0;
continue;
}
if (ret == -2) {
/* go back to last LWSP if found. */
size = i;
break;
}
}
}
break;
}
/* find ':' */
for (i = startpos; i < parse_size; i++) {
if (msg[i] <= ':') {
if (msg[i] == ':') {
colon_pos = i;
i + 1;
break;
}
if (msg[i] == '\n') {
/* end of headers, or error */
break;
}
if (msg[i] == '\0')
}
}
}
/* find '\n' */
for (i = startpos; i < parse_size; i++) {
if (msg[i] <= '\n') {
if (msg[i] == '\n')
break;
if (msg[i] == '\0')
}
}
if (i < parse_size) {
/* got a line */
/* missing CR */
size = i;
} else {
size = i-1;
}
break;
}
startpos = i;
}
if (size == 0) {
/* end of headers */
line->middle_len = 0;
/* missing ':', assume the whole line is name */
line->middle_len = 0;
} else {
if (ctx->skip_initial_lwsp) {
/* get value. skip all LWSP after ':'. Note that
RFC2822 doesn't say we should, but history behind
it..
Exception to this is if the value consists only of
LWSP, then skip only the one LWSP after ':'. */
break;
}
/* everything was LWSP */
pos = 1;
}
} else {
}
/* get name, skip LWSP before ':' */
colon_pos--;
}
/* first header line, set full_value = value */
} else if (line->use_full_value) {
/* continue saving the full value */
if (!last_no_newline)
&line->full_value_len);
} else {
/* we didn't want full_value, and this is a continued line. */
line->full_value_len = 0;
}
/* always reset it */
}
return 1;
}