Cross Reference: rfc822-parser.c
xref
: /
dovecot
/
src
/
lib-mail
/
rfc822-parser.c
Home
History
Annotate
Line#
Navigate
Download
Search
only in
./
rfc822-parser.c revision f4b1d7e52e983ba3063584c8b0ef577d6946331c
163
N/A
/* Copyright (c) 2005-2008 Dovecot authors, see the included COPYING file */
163
N/A
163
N/A
#
include
"
lib.h
"
163
N/A
#
include
"
str.h
"
163
N/A
#
include
"
strescape.h
"
163
N/A
#
include
"
rfc822-parser.h
"
163
N/A
163
N/A
/*
163
N/A
atext = ALPHA / DIGIT / ; Any character except controls,
163
N/A
"!" / "#" / ; SP, and specials.
163
N/A
"$" / "%" / ; Used for atoms
163
N/A
"&" / "'" /
163
N/A
"*" / "+" /
163
N/A
"-" / "/" /
163
N/A
"=" / "?" /
163
N/A
"^" / "_" /
163
N/A
"`" / "{" /
163
N/A
"|" / "}" /
163
N/A
"~"
163
N/A
163
N/A
MIME:
163
N/A
3996
N/A
token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
163
N/A
or tspecials>
163
N/A
tspecials := "(" / ")" / "<" / ">" / "@" /
4601
N/A
"," / ";" / ":" / "\" / <">
3996
N/A
"/" / "[" / "]" / "?" / "="
163
N/A
163
N/A
So token is same as dot-atom, except stops also at '/', '?' and '='.
163
N/A
*/
163
N/A
211
N/A
/* atext chars are marked with 1, alpha and digits with 2,
4601
N/A
atext-but-mime-tspecials with 4 */
636
N/A
unsigned
char
rfc822_atext_chars
[
256
] = {
211
N/A
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0-15 */
1703
N/A
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 16-31 */
844
N/A
0,
1
, 0,
1
,
1
,
1
,
1
,
1
, 0, 0,
1
,
1
, 0,
1
, 0,
4
,
/* 32-47 */
4601
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
, 0, 0, 0,
4
, 0,
4
,
/* 48-63 */
4601
N/A
0,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
/* 64-79 */
1273
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
, 0, 0, 0,
1
,
1
,
/* 80-95 */
163
N/A
1
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
/* 96-111 */
4601
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
1
,
1
,
1
,
1
, 0,
/* 112-127 */
3661
N/A
3996
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
3996
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
3996
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
163
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
4601
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
4601
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
4601
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
163
N/A
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
163
N/A
};
163
N/A
163
N/A
void
rfc822_parser_init
(
struct
rfc822_parser_context
*
ctx
,
163
N/A
const
unsigned
char
*
data
,
size_t
size
,
163
N/A
string_t
*
last_comment
)
163
N/A
{
163
N/A
memset
(
ctx
, 0,
sizeof
(*
ctx
));
163
N/A
ctx
->
data
=
data
;
163
N/A
ctx
->
end
=
data
+
size
;
163
N/A
ctx
->
last_comment
=
last_comment
;
163
N/A
}
4601
N/A
163
N/A
int
rfc822_skip_comment
(
struct
rfc822_parser_context
*
ctx
)
4934
N/A
{
163
N/A
const
unsigned
char
*
start
;
163
N/A
int
level
=
1
;
163
N/A
163
N/A
i_assert
(*
ctx
->
data
==
'('
);
4934
N/A
4601
N/A
if
(
ctx
->
last_comment
!=
NULL
)
163
N/A
str_truncate
(
ctx
->
last_comment
, 0);
163
N/A
163
N/A
start
= ++
ctx
->
data
;
163
N/A
for
(;
ctx
->
data
!=
ctx
->
end
;
ctx
->
data
++) {
163
N/A
switch
(*
ctx
->
data
) {
163
N/A
case
'('
:
163
N/A
level
++;
4601
N/A
break
;
4601
N/A
case
')'
:
163
N/A
if
(--
level
== 0) {
163
N/A
if
(
ctx
->
last_comment
!=
NULL
) {
163
N/A
str_append_n
(
ctx
->
last_comment
,
start
,
163
N/A
ctx
->
data
-
start
);
211
N/A
}
163
N/A
ctx
->
data
++;
163
N/A
return
ctx
->
data
!=
ctx
->
end
;
163
N/A
}
163
N/A
break
;
163
N/A
case
'\\'
:
163
N/A
if
(
ctx
->
last_comment
!=
NULL
) {
163
N/A
str_append_n
(
ctx
->
last_comment
,
start
,
4601
N/A
ctx
->
data
-
start
);
4601
N/A
}
4601
N/A
start
=
ctx
->
data
+
1
;
4601
N/A
4601
N/A
ctx
->
data
++;
4601
N/A
if
(
ctx
->
data
==
ctx
->
end
)
4601
N/A
return
-
1
;
4601
N/A
break
;
4601
N/A
}
4601
N/A
}
163
N/A
163
N/A
/* missing ')' */
163
N/A
return
-
1
;
163
N/A
}
163
N/A
1703
N/A
int
rfc822_skip_lwsp
(
struct
rfc822_parser_context
*
ctx
)
1703
N/A
{
1703
N/A
for
(;
ctx
->
data
!=
ctx
->
end
;) {
1703
N/A
if
(*
ctx
->
data
==
' '
|| *
ctx
->
data
==
'\t'
||
1703
N/A
*
ctx
->
data
==
'\r'
|| *
ctx
->
data
==
'\n'
) {
1703
N/A
ctx
->
data
++;
1703
N/A
continue
;
1703
N/A
}
1703
N/A
1703
N/A
if
(*
ctx
->
data
!=
'('
)
1703
N/A
break
;
1703
N/A
1703
N/A
if
(
rfc822_skip_comment
(
ctx
) < 0)
1703
N/A
return
-
1
;
4601
N/A
}
4601
N/A
return
ctx
->
data
!=
ctx
->
end
;
4601
N/A
}
4601
N/A
1703
N/A
int
rfc822_parse_atom
(
struct
rfc822_parser_context
*
ctx
,
string_t
*
str
)
1703
N/A
{
1703
N/A
const
unsigned
char
*
start
;
1703
N/A
1703
N/A
/*
1703
N/A
atom = [CFWS] 1*atext [CFWS]
1703
N/A
atext =
1703
N/A
; Any character except controls, SP, and specials.
1703
N/A
*/
1703
N/A
if
(
ctx
->
data
==
ctx
->
end
|| !
IS_ATEXT
(*
ctx
->
data
))
1703
N/A
return
-
1
;
1703
N/A
1703
N/A
for
(
start
=
ctx
->
data
++;
ctx
->
data
!=
ctx
->
end
;
ctx
->
data
++) {
1703
N/A
if
(
IS_ATEXT
(*
ctx
->
data
))
181
N/A
continue
;
163
N/A
163
N/A
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
3996
N/A
return
rfc822_skip_lwsp
(
ctx
);
3996
N/A
}
3996
N/A
3996
N/A
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
3996
N/A
return
0;
3996
N/A
}
4601
N/A
4601
N/A
int
rfc822_parse_dot_atom
(
struct
rfc822_parser_context
*
ctx
,
string_t
*
str
)
3996
N/A
{
4601
N/A
const
unsigned
char
*
start
;
3996
N/A
int
ret
;
3996
N/A
3996
N/A
/*
4601
N/A
dot-atom = [CFWS] dot-atom-text [CFWS]
dot-atom-text = 1*atext *("." 1*atext)
atext =
; Any character except controls, SP, and specials.
For RFC-822 compatibility allow LWSP around '.'
*/
if
(
ctx
->
data
==
ctx
->
end
|| !
IS_ATEXT
(*
ctx
->
data
))
return
-
1
;
for
(
start
=
ctx
->
data
++;
ctx
->
data
!=
ctx
->
end
;
ctx
->
data
++) {
if
(
IS_ATEXT
(*
ctx
->
data
))
continue
;
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
if
((
ret
=
rfc822_skip_lwsp
(
ctx
)) <= 0)
return
ret
;
if
(*
ctx
->
data
!=
'.'
)
return
1
;
ctx
->
data
++;
str_append_c
(
str
,
'.'
);
if
((
ret
=
rfc822_skip_lwsp
(
ctx
)) <= 0)
return
ret
;
start
=
ctx
->
data
;
}
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
return
0;
}
int
rfc822_parse_mime_token
(
struct
rfc822_parser_context
*
ctx
,
string_t
*
str
)
{
const
unsigned
char
*
start
;
for
(
start
=
ctx
->
data
;
ctx
->
data
!=
ctx
->
end
;
ctx
->
data
++) {
if
(
IS_ATEXT_NON_TSPECIAL
(*
ctx
->
data
) || *
ctx
->
data
==
'.'
)
continue
;
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
return
rfc822_skip_lwsp
(
ctx
);
}
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
return
0;
}
int
rfc822_parse_quoted_string
(
struct
rfc822_parser_context
*
ctx
,
string_t
*
str
)
{
const
unsigned
char
*
start
;
i_assert
(*
ctx
->
data
==
'"'
);
ctx
->
data
++;
for
(
start
=
ctx
->
data
;
ctx
->
data
!=
ctx
->
end
;
ctx
->
data
++) {
if
(*
ctx
->
data
==
'"'
) {
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
ctx
->
data
++;
return
rfc822_skip_lwsp
(
ctx
);
}
if
(*
ctx
->
data
!=
'\\'
)
continue
;
ctx
->
data
++;
if
(
ctx
->
data
==
ctx
->
end
)
return
-
1
;
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
start
=
ctx
->
data
;
}
/* missing '"' */
return
-
1
;
}
static
int
rfc822_parse_atom_or_dot
(
struct
rfc822_parser_context
*
ctx
,
string_t
*
str
)
{
const
unsigned
char
*
start
;
/*
atom = [CFWS] 1*atext [CFWS]
atext =
; Any character except controls, SP, and specials.
The difference between this function and rfc822_parse_dot_atom()
is that this doesn't just silently skip over all the whitespace.
*/
for
(
start
=
ctx
->
data
;
ctx
->
data
!=
ctx
->
end
;
ctx
->
data
++) {
if
(
IS_ATEXT
(*
ctx
->
data
) || *
ctx
->
data
==
'.'
)
continue
;
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
return
rfc822_skip_lwsp
(
ctx
);
}
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
return
0;
}
int
rfc822_parse_phrase
(
struct
rfc822_parser_context
*
ctx
,
string_t
*
str
)
{
int
ret
;
/*
phrase = 1*word / obs-phrase
word = atom / quoted-string
obs-phrase = word *(word / "." / CFWS)
*/
if
(
ctx
->
data
==
ctx
->
end
)
return
0;
if
(*
ctx
->
data
==
'.'
)
return
-
1
;
for
(;;) {
if
(*
ctx
->
data
==
'"'
)
ret
=
rfc822_parse_quoted_string
(
ctx
,
str
);
else
ret
=
rfc822_parse_atom_or_dot
(
ctx
,
str
);
if
(
ret
<= 0)
return
ret
;
if
(!
IS_ATEXT
(*
ctx
->
data
) && *
ctx
->
data
!=
'"'
&& *
ctx
->
data
!=
'.'
)
break
;
str_append_c
(
str
,
' '
);
}
return
rfc822_skip_lwsp
(
ctx
);
}
static
int
rfc822_parse_domain_literal
(
struct
rfc822_parser_context
*
ctx
,
string_t
*
str
)
{
const
unsigned
char
*
start
;
/*
domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
dcontent = dtext / quoted-pair
dtext = NO-WS-CTL / ; Non white space controls
%d33-90 / ; The rest of the US-ASCII
%d94-126 ; characters not including "[",
; "]", or "\"
*/
i_assert
(*
ctx
->
data
==
'['
);
for
(
start
=
ctx
->
data
;
ctx
->
data
!=
ctx
->
end
;
ctx
->
data
++) {
if
(*
ctx
->
data
==
'\\'
) {
ctx
->
data
++;
if
(
ctx
->
data
==
ctx
->
end
)
break
;
}
else
if
(*
ctx
->
data
==
']'
) {
ctx
->
data
++;
str_append_n
(
str
,
start
,
ctx
->
data
-
start
);
return
rfc822_skip_lwsp
(
ctx
);
}
}
/* missing ']' */
return
-
1
;
}
int
rfc822_parse_domain
(
struct
rfc822_parser_context
*
ctx
,
string_t
*
str
)
{
/*
domain = dot-atom / domain-literal / obs-domain
domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
obs-domain = atom *("." atom)
*/
i_assert
(*
ctx
->
data
==
'@'
);
ctx
->
data
++;
if
(
rfc822_skip_lwsp
(
ctx
) <= 0)
return
-
1
;
if
(*
ctx
->
data
==
'['
)
return
rfc822_parse_domain_literal
(
ctx
,
str
);
else
return
rfc822_parse_dot_atom
(
ctx
,
str
);
}
int
rfc822_parse_content_type
(
struct
rfc822_parser_context
*
ctx
,
string_t
*
str
)
{
if
(
rfc822_skip_lwsp
(
ctx
) <= 0)
return
-
1
;
/* get main type */
if
(
rfc822_parse_mime_token
(
ctx
,
str
) <= 0)
return
-
1
;
/* skip over "/" */
if
(*
ctx
->
data
!=
'/'
)
return
-
1
;
ctx
->
data
++;
if
(
rfc822_skip_lwsp
(
ctx
) <= 0)
return
-
1
;
str_append_c
(
str
,
'/'
);
/* get subtype */
return
rfc822_parse_mime_token
(
ctx
,
str
);
}
int
rfc822_parse_content_param
(
struct
rfc822_parser_context
*
ctx
,
const
char
**
key_r
,
const
char
**
value_r
)
{
string_t
*
tmp
;
size_t
value_pos
;
int
ret
;
/* .. := *(";" parameter)
parameter := attribute "=" value
attribute := token
value := token / quoted-string
*/
*
key_r
=
NULL
;
*
value_r
=
NULL
;
if
(
ctx
->
data
==
ctx
->
end
)
return
0;
if
(*
ctx
->
data
!=
';'
)
return
-
1
;
ctx
->
data
++;
if
(
rfc822_skip_lwsp
(
ctx
) <= 0)
return
-
1
;
tmp
=
t_str_new
(
64
);
if
(
rfc822_parse_mime_token
(
ctx
,
tmp
) <= 0)
return
-
1
;
str_append_c
(
tmp
,
'\0'
);
value_pos
=
str_len
(
tmp
);
if
(*
ctx
->
data
!=
'='
)
return
-
1
;
ctx
->
data
++;
if
((
ret
=
rfc822_skip_lwsp
(
ctx
)) <= 0) {
/* broken / no value */
}
else
if
(*
ctx
->
data
==
'"'
) {
ret
=
rfc822_parse_quoted_string
(
ctx
,
tmp
);
str_unescape
(
str_c_modifiable
(
tmp
) +
value_pos
);
}
else
{
ret
=
rfc822_parse_mime_token
(
ctx
,
tmp
);
}
*
key_r
=
str_c
(
tmp
);
*
value_r
= *
key_r
+
value_pos
;
return
ret
< 0 ? -
1
:
1
;
}