/* Copyright (c) 2005-2018 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "str.h"
#include "strescape.h"
#include "rfc822-parser.h"
/*
atext = ALPHA / DIGIT / ; Any character except controls,
"!" / "#" / ; SP, and specials.
"$" / "%" / ; Used for atoms
"&" / "'" /
"*" / "+" /
"-" / "/" /
"=" / "?" /
"^" / "_" /
"`" / "{" /
"|" / "}" /
"~"
MIME:
token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
or tspecials>
tspecials := "(" / ")" / "<" / ">" / "@" /
"," / ";" / ":" / "\" / <">
"/" / "[" / "]" / "?" / "="
So token is same as dot-atom, except stops also at '/', '?' and '='.
*/
/* atext chars are marked with 1, alpha and digits with 2,
atext-but-mime-tspecials with 4 */
unsigned char rfc822_atext_chars[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 4, /* 32-47 */
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 4, 0, 4, /* 48-63 */
0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, /* 80-95 */
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 96-111 */
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, /* 112-127 */
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
};
void rfc822_parser_init(struct rfc822_parser_context *ctx,
const unsigned char *data, size_t size,
string_t *last_comment)
{
i_zero(ctx);
ctx->data = data;
ctx->end = data + size;
ctx->last_comment = last_comment;
}
int rfc822_skip_comment(struct rfc822_parser_context *ctx)
{
const unsigned char *start;
int level = 1;
i_assert(*ctx->data == '(');
if (ctx->last_comment != NULL)
str_truncate(ctx->last_comment, 0);
start = ++ctx->data;
for (; ctx->data != ctx->end; ctx->data++) {
switch (*ctx->data) {
case '(':
level++;
break;
case ')':
if (--level == 0) {
if (ctx->last_comment != NULL) {
str_append_n(ctx->last_comment, start,
ctx->data - start);
}
ctx->data++;
return ctx->data != ctx->end ? 1 : 0;
}
break;
case '\\':
if (ctx->last_comment != NULL) {
str_append_n(ctx->last_comment, start,
ctx->data - start);
}
start = ctx->data + 1;
ctx->data++;
if (ctx->data == ctx->end)
return -1;
break;
}
}
/* missing ')' */
return -1;
}
int rfc822_skip_lwsp(struct rfc822_parser_context *ctx)
{
for (; ctx->data != ctx->end;) {
if (*ctx->data == ' ' || *ctx->data == '\t' ||
*ctx->data == '\r' || *ctx->data == '\n') {
ctx->data++;
continue;
}
if (*ctx->data != '(')
break;
if (rfc822_skip_comment(ctx) < 0)
return -1;
}
return ctx->data != ctx->end ? 1 : 0;
}
int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
/*
atom = [CFWS] 1*atext [CFWS]
atext =
; Any character except controls, SP, and specials.
*/
if (ctx->data == ctx->end || !IS_ATEXT(*ctx->data))
return -1;
for (start = ctx->data++; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT(*ctx->data))
continue;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
int ret;
/*
dot-atom = [CFWS] dot-atom-text [CFWS]
dot-atom-text = 1*atext *("." 1*atext)
atext =
; Any character except controls, SP, and specials.
For RFC-822 compatibility allow LWSP around '.'
*/
if (ctx->data == ctx->end || !IS_ATEXT(*ctx->data))
return -1;
for (start = ctx->data++; ctx->data != ctx->end; ) {
if (IS_ATEXT(*ctx->data)) {
ctx->data++;
continue;
}
str_append_n(str, start, ctx->data - start);
if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
return ret;
if (*ctx->data != '.')
return 1;
ctx->data++;
str_append_c(str, '.');
if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
return ret;
start = ctx->data;
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_mime_token(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT_NON_TSPECIAL(*ctx->data) || *ctx->data == '.')
continue;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
size_t len;
i_assert(*ctx->data == '"');
ctx->data++;
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
switch (*ctx->data) {
case '"':
str_append_n(str, start, ctx->data - start);
ctx->data++;
return rfc822_skip_lwsp(ctx);
case '\n':
/* folding whitespace, remove the (CR)LF */
len = ctx->data - start;
if (len > 0 && start[len-1] == '\r')
len--;
str_append_n(str, start, len);
start = ctx->data + 1;
break;
case '\\':
ctx->data++;
if (ctx->data == ctx->end)
return -1;
str_append_n(str, start, ctx->data - start - 1);
start = ctx->data;
break;
}
}
/* missing '"' */
return -1;
}
static int
rfc822_parse_atom_or_dot(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
/*
atom = [CFWS] 1*atext [CFWS]
atext =
; Any character except controls, SP, and specials.
The difference between this function and rfc822_parse_dot_atom()
is that this doesn't just silently skip over all the whitespace.
*/
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (IS_ATEXT(*ctx->data) || *ctx->data == '.')
continue;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
str_append_n(str, start, ctx->data - start);
return 0;
}
int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str)
{
int ret;
/*
phrase = 1*word / obs-phrase
word = atom / quoted-string
obs-phrase = word *(word / "." / CFWS)
*/
if (ctx->data == ctx->end)
return 0;
if (*ctx->data == '.')
return -1;
for (;;) {
if (*ctx->data == '"')
ret = rfc822_parse_quoted_string(ctx, str);
else
ret = rfc822_parse_atom_or_dot(ctx, str);
if (ret <= 0)
return ret;
if (!IS_ATEXT(*ctx->data) && *ctx->data != '"'
&& *ctx->data != '.')
break;
str_append_c(str, ' ');
}
return rfc822_skip_lwsp(ctx);
}
static int
rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str)
{
const unsigned char *start;
/*
domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
dcontent = dtext / quoted-pair
dtext = NO-WS-CTL / ; Non white space controls
%d33-90 / ; The rest of the US-ASCII
%d94-126 ; characters not including "[",
; "]", or "\"
*/
i_assert(*ctx->data == '[');
for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
if (*ctx->data == '\\') {
ctx->data++;
if (ctx->data == ctx->end)
break;
} else if (*ctx->data == ']') {
ctx->data++;
str_append_n(str, start, ctx->data - start);
return rfc822_skip_lwsp(ctx);
}
}
/* missing ']' */
return -1;
}
int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str)
{
/*
domain = dot-atom / domain-literal / obs-domain
domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
obs-domain = atom *("." atom)
*/
i_assert(*ctx->data == '@');
ctx->data++;
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
if (*ctx->data == '[')
return rfc822_parse_domain_literal(ctx, str);
else
return rfc822_parse_dot_atom(ctx, str);
}
int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str)
{
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
/* get main type */
if (rfc822_parse_mime_token(ctx, str) <= 0)
return -1;
/* skip over "/" */
if (*ctx->data != '/')
return -1;
ctx->data++;
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
str_append_c(str, '/');
/* get subtype */
return rfc822_parse_mime_token(ctx, str);
}
int rfc822_parse_content_param(struct rfc822_parser_context *ctx,
const char **key_r, const char **value_r)
{
string_t *tmp;
size_t value_pos;
int ret;
/* .. := *(";" parameter)
parameter := attribute "=" value
attribute := token
value := token / quoted-string
*/
*key_r = NULL;
*value_r = NULL;
if (ctx->data == ctx->end)
return 0;
if (*ctx->data != ';')
return -1;
ctx->data++;
if (rfc822_skip_lwsp(ctx) <= 0)
return -1;
tmp = t_str_new(64);
if (rfc822_parse_mime_token(ctx, tmp) <= 0)
return -1;
str_append_c(tmp, '\0');
value_pos = str_len(tmp);
if (*ctx->data != '=')
return -1;
ctx->data++;
if ((ret = rfc822_skip_lwsp(ctx)) <= 0) {
/* broken / no value */
} else if (*ctx->data == '"') {
ret = rfc822_parse_quoted_string(ctx, tmp);
} else if (ctx->data != ctx->end && *ctx->data == '=') {
/* workaround for broken input:
name==?utf-8?b?...?= */
while (ctx->data != ctx->end && *ctx->data != ';' &&
*ctx->data != ' ' && *ctx->data != '\t' &&
*ctx->data != '\r' && *ctx->data != '\n') {
str_append_c(tmp, *ctx->data);
ctx->data++;
}
} else {
ret = rfc822_parse_mime_token(ctx, tmp);
}
*key_r = str_c(tmp);
*value_r = *key_r + value_pos;
return ret < 0 ? -1 : 1;
}