message-tokenize.c revision 7c424aa51c956c628e3512055841aa2f9eef4833
/* Copyright (C) 2002 Timo Sirainen */
#include "lib.h"
#include "str.h"
#include "strescape.h"
#include "message-tokenize.h"
struct message_tokenizer {
const unsigned char *data;
void *error_context;
int token;
unsigned int skip_comments:1;
unsigned int dot_token:1;
unsigned int in_bracket:1;
};
#define PARSE_ERROR() \
STMT_START { \
return TOKEN_LAST; \
} \
} STMT_END
#define PARSE_ERROR_MISSING(c) \
STMT_START { \
return TOKEN_LAST; \
} \
} STMT_END
struct message_tokenizer *
void *error_context)
{
struct message_tokenizer *tok;
return tok;
}
{
}
{
}
{
}
{
const unsigned char *data;
return TOKEN_LAST;
token = -1;
switch (data[i]) {
case ' ':
case '\t':
case '\r':
case '\n':
/* skip whitespace */
break;
case '(':
/* (comment) - nesting is allowed */
if (last_atom)
break;
token = '(';
level = 1;
if (data[i] == '\\' &&
i++;
else if (data[i] == '(')
level++;
else if (data[i] == ')') {
if (--level == 0)
break;
}
}
if (level > 0)
PARSE_ERROR_MISSING(')');
break;
case '[':
/* domain literal - nesting isn't allowed */
if (last_atom)
break;
token = '[';
if (data[i] == '\\' &&
i++;
else if (data[i] == '[') {
/* nesting not allowed, but
continue anyway */
PARSE_ERROR();
}
i++;
}
PARSE_ERROR_MISSING(']');
break;
case '"':
/* quoted string */
if (last_atom)
break;
token = '"';
if (data[i] == '\\' &&
i++;
i++;
}
PARSE_ERROR_MISSING('"');
break;
case '<':
if (last_atom)
break;
if (tok->in_bracket) {
/* '<' cannot be nested */
PARSE_ERROR();
}
token = '<';
break;
case '>':
if (last_atom)
break;
if (!tok->in_bracket) {
/* missing '<' */
PARSE_ERROR();
}
token = '>';
break;
case ')':
case ']':
case '\\':
PARSE_ERROR();
/* fall through */
/* RFC822 specials: */
case '@':
case ',':
case ';':
case ':':
case '.':
/* RFC 2045 specials: */
case '/':
case '?':
case '=':
break;
/* fall through */
default:
/* atom */
token = 'A';
if (!last_atom) {
}
break;
}
if (last_atom) {
if (token != 'A') {
/* end of atom */
break;
}
} else {
if (token != -1) {
i++;
break;
}
}
/* unexpected eol */
break;
}
}
if (last_atom) {
/* end of atom */
}
}
}
{
}
{
}
const unsigned char *
{
}
const enum message_token *stop_tokens)
{
enum message_token token;
const unsigned char *value;
for (i = 0; stop_tokens[i] != TOKEN_LAST; i++)
if (token == stop_tokens[i])
return;
if (token == TOKEN_COMMENT) {
/* handle comment specially */
}
continue;
}
if (!token_str)
else if (token == TOKEN_QSTRING) {
/* unescape only quoted strings, since we're removing
the quotes. for domain literals I don't see much
point in unescaping if [] is still kept.. */
if (last_str)
} else {
if (last_str)
if (token == TOKEN_DLITERAL)
if (token == TOKEN_DLITERAL)
}
}
}