rfc822-parser.c revision f4b1d7e52e983ba3063584c8b0ef577d6946331c
163N/A/* Copyright (c) 2005-2008 Dovecot authors, see the included COPYING file */
163N/A
163N/A#include "lib.h"
163N/A#include "str.h"
163N/A#include "strescape.h"
163N/A#include "rfc822-parser.h"
163N/A
163N/A/*
163N/A atext = ALPHA / DIGIT / ; Any character except controls,
163N/A "!" / "#" / ; SP, and specials.
163N/A "$" / "%" / ; Used for atoms
163N/A "&" / "'" /
163N/A "*" / "+" /
163N/A "-" / "/" /
163N/A "=" / "?" /
163N/A "^" / "_" /
163N/A "`" / "{" /
163N/A "|" / "}" /
163N/A "~"
163N/A
163N/A MIME:
163N/A
3996N/A token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
163N/A or tspecials>
163N/A tspecials := "(" / ")" / "<" / ">" / "@" /
4601N/A "," / ";" / ":" / "\" / <">
3996N/A "/" / "[" / "]" / "?" / "="
163N/A
163N/A So token is same as dot-atom, except stops also at '/', '?' and '='.
163N/A*/
163N/A
211N/A/* atext chars are marked with 1, alpha and digits with 2,
4601N/A atext-but-mime-tspecials with 4 */
636N/Aunsigned char rfc822_atext_chars[256] = {
211N/A 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */
1703N/A 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
844N/A 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 4, /* 32-47 */
4601N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 4, 0, 4, /* 48-63 */
4601N/A 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */
1273N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, /* 80-95 */
163N/A 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 96-111 */
4601N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, /* 112-127 */
3661N/A
3996N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3996N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3996N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
163N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
4601N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
4601N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
4601N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
163N/A 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
163N/A};
163N/A
163N/Avoid rfc822_parser_init(struct rfc822_parser_context *ctx,
163N/A const unsigned char *data, size_t size,
163N/A string_t *last_comment)
163N/A{
163N/A memset(ctx, 0, sizeof(*ctx));
163N/A ctx->data = data;
163N/A ctx->end = data + size;
163N/A ctx->last_comment = last_comment;
163N/A}
4601N/A
163N/Aint rfc822_skip_comment(struct rfc822_parser_context *ctx)
4934N/A{
163N/A const unsigned char *start;
163N/A int level = 1;
163N/A
163N/A i_assert(*ctx->data == '(');
4934N/A
4601N/A if (ctx->last_comment != NULL)
163N/A str_truncate(ctx->last_comment, 0);
163N/A
163N/A start = ++ctx->data;
163N/A for (; ctx->data != ctx->end; ctx->data++) {
163N/A switch (*ctx->data) {
163N/A case '(':
163N/A level++;
4601N/A break;
4601N/A case ')':
163N/A if (--level == 0) {
163N/A if (ctx->last_comment != NULL) {
163N/A str_append_n(ctx->last_comment, start,
163N/A ctx->data - start);
211N/A }
163N/A ctx->data++;
163N/A return ctx->data != ctx->end;
163N/A }
163N/A break;
163N/A case '\\':
163N/A if (ctx->last_comment != NULL) {
163N/A str_append_n(ctx->last_comment, start,
4601N/A ctx->data - start);
4601N/A }
4601N/A start = ctx->data + 1;
4601N/A
4601N/A ctx->data++;
4601N/A if (ctx->data == ctx->end)
4601N/A return -1;
4601N/A break;
4601N/A }
4601N/A }
163N/A
163N/A /* missing ')' */
163N/A return -1;
163N/A}
163N/A
1703N/Aint rfc822_skip_lwsp(struct rfc822_parser_context *ctx)
1703N/A{
1703N/A for (; ctx->data != ctx->end;) {
1703N/A if (*ctx->data == ' ' || *ctx->data == '\t' ||
1703N/A *ctx->data == '\r' || *ctx->data == '\n') {
1703N/A ctx->data++;
1703N/A continue;
1703N/A }
1703N/A
1703N/A if (*ctx->data != '(')
1703N/A break;
1703N/A
1703N/A if (rfc822_skip_comment(ctx) < 0)
1703N/A return -1;
4601N/A }
4601N/A return ctx->data != ctx->end;
4601N/A}
4601N/A
1703N/Aint rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str)
1703N/A{
1703N/A const unsigned char *start;
1703N/A
1703N/A /*
1703N/A atom = [CFWS] 1*atext [CFWS]
1703N/A atext =
1703N/A ; Any character except controls, SP, and specials.
1703N/A */
1703N/A if (ctx->data == ctx->end || !IS_ATEXT(*ctx->data))
1703N/A return -1;
1703N/A
1703N/A for (start = ctx->data++; ctx->data != ctx->end; ctx->data++) {
1703N/A if (IS_ATEXT(*ctx->data))
181N/A continue;
163N/A
163N/A str_append_n(str, start, ctx->data - start);
3996N/A return rfc822_skip_lwsp(ctx);
3996N/A }
3996N/A
3996N/A str_append_n(str, start, ctx->data - start);
3996N/A return 0;
3996N/A}
4601N/A
4601N/Aint rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str)
3996N/A{
4601N/A const unsigned char *start;
3996N/A int ret;
3996N/A
3996N/A /*
4601N/A dot-atom = [CFWS] dot-atom-text [CFWS]
dot-atom-text = 1*atext *("." 1*atext)
atext =
; Any character except controls, SP, and specials.
For RFC-822 compatibility allow LWSP around '.'
*/
if (ctx->data == ctx->end || !IS_ATEXT(*ctx->data))
return -1;
for (start = ctx->data++; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT(*ctx->data))
continue;
str_append_n(str, start, ctx->data - start);
if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
return ret;
if (*ctx->data != '.')
return 1;
ctx->data++;
str_append_c(str, '.');
if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
return ret;
start = ctx->data;
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_mime_token(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT_NON_TSPECIAL(*ctx->data) || *ctx->data == '.')
continue;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
i_assert(*ctx->data == '"');
ctx->data++;
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (*ctx->data == '"') {
str_append_n(str, start, ctx->data - start);
ctx->data++;
return rfc822_skip_lwsp(ctx);
}
if (*ctx->data != '\\')
continue;
ctx->data++;
if (ctx->data == ctx->end)
return -1;
str_append_n(str, start, ctx->data - start);
start = ctx->data;
}
/* missing '"' */
return -1;
}
static int
rfc822_parse_atom_or_dot(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
/*
atom = [CFWS] 1*atext [CFWS]
atext =
; Any character except controls, SP, and specials.
The difference between this function and rfc822_parse_dot_atom()
is that this doesn't just silently skip over all the whitespace.
*/
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT(*ctx->data) || *ctx->data == '.')
continue;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str)
{
int ret;
/*
phrase = 1*word / obs-phrase
word = atom / quoted-string
obs-phrase = word *(word / "." / CFWS)
*/
if (ctx->data == ctx->end)
return 0;
if (*ctx->data == '.')
return -1;
for (;;) {
if (*ctx->data == '"')
ret = rfc822_parse_quoted_string(ctx, str);
else
ret = rfc822_parse_atom_or_dot(ctx, str);
if (ret <= 0)
return ret;
if (!IS_ATEXT(*ctx->data) && *ctx->data != '"'
&& *ctx->data != '.')
break;
str_append_c(str, ' ');
}
return rfc822_skip_lwsp(ctx);
}
static int
rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
/*
domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
dcontent = dtext / quoted-pair
dtext = NO-WS-CTL / ; Non white space controls
%d33-90 / ; The rest of the US-ASCII
%d94-126 ; characters not including "[",
; "]", or "\"
*/
i_assert(*ctx->data == '[');
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (*ctx->data == '\\') {
ctx->data++;
if (ctx->data == ctx->end)
break;
} else if (*ctx->data == ']') {
ctx->data++;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
}
/* missing ']' */
return -1;
}
int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str)
{
/*
domain = dot-atom / domain-literal / obs-domain
domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
obs-domain = atom *("." atom)
*/
i_assert(*ctx->data == '@');
ctx->data++;
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
if (*ctx->data == '[')
return rfc822_parse_domain_literal(ctx, str);
else
return rfc822_parse_dot_atom(ctx, str);
}
int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str)
{
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
/* get main type */
if (rfc822_parse_mime_token(ctx, str) <= 0)
return -1;
/* skip over "/" */
if (*ctx->data != '/')
return -1;
ctx->data++;
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
str_append_c(str, '/');
/* get subtype */
return rfc822_parse_mime_token(ctx, str);
}
int rfc822_parse_content_param(struct rfc822_parser_context *ctx,
const char **key_r, const char **value_r)
{
string_t *tmp;
size_t value_pos;
int ret;
/* .. := *(";" parameter)
parameter := attribute "=" value
attribute := token
value := token / quoted-string
*/
*key_r = NULL;
*value_r = NULL;
if (ctx->data == ctx->end)
return 0;
if (*ctx->data != ';')
return -1;
ctx->data++;
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
tmp = t_str_new(64);
if (rfc822_parse_mime_token(ctx, tmp) <= 0)
return -1;
str_append_c(tmp, '\0');
value_pos = str_len(tmp);
if (*ctx->data != '=')
return -1;
ctx->data++;
if ((ret = rfc822_skip_lwsp(ctx)) <= 0) {
/* broken / no value */
} else if (*ctx->data == '"') {
ret = rfc822_parse_quoted_string(ctx, tmp);
str_unescape(str_c_modifiable(tmp) + value_pos);
} else {
ret = rfc822_parse_mime_token(ctx, tmp);
}
*key_r = str_c(tmp);
*value_r = *key_r + value_pos;
return ret < 0 ? -1 : 1;
}