fts-tokenizer-address.c revision 507ea0bc5b25efb4c96033a19dec66689a50ebd0
/* Copyright (c) 2015-2016 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "str.h"
#include "buffer.h"
#include "rfc822-parser.h"
#include "fts-tokenizer-private.h"
#include "fts-tokenizer-common.h"
#define IS_DTEXT(c) \
(rfc822_atext_chars[(int)(unsigned char)(c)] == 2)
#define FTS_DEFAULT_ADDRESS_MAX_LENGTH 254
enum email_address_parser_state {
};
struct email_address_fts_tokenizer {
struct fts_tokenizer tokenizer;
unsigned int max_length;
bool search;
};
static int
fts_tokenizer_email_address_create(const char *const *settings,
struct fts_tokenizer **tokenizer_r,
const char **error_r)
{
struct email_address_fts_tokenizer *tok;
unsigned int max_length = FTS_DEFAULT_ADDRESS_MAX_LENGTH;
unsigned int i;
max_length == 0) {
return -1;
}
} else {
return -1;
}
}
return 0;
}
{
struct email_address_fts_tokenizer *tok =
(struct email_address_fts_tokenizer *)_tok;
}
static void
const char **token_r)
{
/* As future proofing, delete partial utf8.
IS_DTEXT() does not actually allow utf8 addresses
yet though. */
}
if (len > 0)
}
static bool
const char **token_r)
{
return FALSE;
/* we're searching and we want to find only the full
user@domain (not "user" and "domain"). we'll do this by
not feeding the last user@domain to parent tokenizer. */
unsigned int parent_prefix_len =
return FALSE;
}
return TRUE;
}
/* Used to rewind past characters that can not be the start of a new localpart.
Returns size that can be skipped. */
{
/* Yes, a dot can start an address. De facto before de jure. */
skip++;
return skip;
}
static enum email_address_parser_state
{
pos++;
if (seen_at)
break;
}
/* localpart and @ */
}
/* localpart, @ not included yet */
}
/* not a localpart. skip past rest of no-good chars. */
return EMAIL_ADDRESS_PARSER_STATE_NONE;
}
{
return TRUE;
return p[1] == '\0';
}
static enum email_address_parser_state
{
pos++;
/* A complete domain name */
}
}
/* not a domain. skip past no-good chars. */
return EMAIL_ADDRESS_PARSER_STATE_NONE;
}
/* Buffer raw data for parent. */
static void
{
}
{
struct email_address_fts_tokenizer *tok =
(struct email_address_fts_tokenizer *)_tok;
}
static int
const char **error_r ATTR_UNUSED)
{
struct email_address_fts_tokenizer *tok =
(struct email_address_fts_tokenizer *)_tok;
return 1;
}
/* end of data, output lingering tokens. first the parents data, then
possibly our token, if complete enough */
if (size == 0) {
domain_is_empty(tok)) {
/* user@ without domain - reset state */
}
return 1;
return 1;
}
}
/* 1) regular input data OR
2) circle around to return completed address */
/* no part of address found yet. remove possible
earlier data */
/* fall through */
/* last_word is empty or has the beginnings of a valid
local-part, but no '@' found yet. continue parsing
the beginning of data to see if it contains a full
local-part@ */
&local_skip);
pos += local_skip;
break;
/* last_word has a local-part@ and maybe the beginning
of a domain. continue parsing the beginning of data
to see if it contains a valid domain. */
&local_skip);
pos += local_skip;
break;
return 1;
default:
i_unreached();
}
}
return 0;
}
static const struct fts_tokenizer_vfuncs email_address_tokenizer_vfuncs = {
};
static const struct fts_tokenizer fts_tokenizer_email_address_real = {
.name = "email-address",
};
const struct fts_tokenizer *fts_tokenizer_email_address =