/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "buffer.h"
#include "unichar.h"
#include "message-parser.h"
#include "fts-parser.h"
static const struct fts_parser_vfuncs *parsers[] = {
&fts_parser_html,
&fts_parser_script,
&fts_parser_tika
};
static const char *plaintext_content_types[] = {
"text/plain",
"message/delivery-status",
"message/disposition-notification",
"application/pgp-signature",
NULL
};
bool fts_parser_init(struct fts_parser_context *parser_context,
struct fts_parser **parser_r)
{
unsigned int i;
i_assert(parser_context->user != NULL);
i_assert(parser_context->content_type != NULL);
if (str_array_find(plaintext_content_types, parser_context->content_type)) {
/* we probably don't want/need to allow parsers to handle
plaintext? */
return FALSE;
}
for (i = 0; i < N_ELEMENTS(parsers); i++) {
*parser_r = parsers[i]->try_init(parser_context);
if (*parser_r != NULL)
return TRUE;
}
return FALSE;
}
struct fts_parser *fts_parser_text_init(void)
{
return i_new(struct fts_parser, 1);
}
static bool data_has_nuls(const unsigned char *data, size_t size)
{
size_t i;
for (i = 0; i < size; i++) {
if (data[i] == '\0')
return TRUE;
}
return FALSE;
}
static void replace_nul_bytes(buffer_t *buf)
{
unsigned char *data;
size_t i, size;
data = buffer_get_modifiable_data(buf, &size);
for (i = 0; i < size; i++) {
if (data[i] == '\0')
data[i] = ' ';
}
}
void fts_parser_more(struct fts_parser *parser, struct message_block *block)
{
if (parser->v.more != NULL)
parser->v.more(parser, block);
if (!uni_utf8_data_is_valid(block->data, block->size) ||
data_has_nuls(block->data, block->size)) {
/* output isn't valid UTF-8. make it. */
if (parser->utf8_output == NULL) {
parser->utf8_output =
buffer_create_dynamic(default_pool, 4096);
} else {
buffer_set_used_size(parser->utf8_output, 0);
}
(void)uni_utf8_get_valid_data(block->data, block->size,
parser->utf8_output);
replace_nul_bytes(parser->utf8_output);
block->data = parser->utf8_output->data;
block->size = parser->utf8_output->used;
}
}
int fts_parser_deinit(struct fts_parser **_parser, const char **retriable_err_msg_r)
{
struct fts_parser *parser = *_parser;
int ret = 1;
*_parser = NULL;
buffer_free(&parser->utf8_output);
if (parser->v.deinit != NULL) {
const char *error = NULL;
ret = parser->v.deinit(parser, &error);
if (ret == 0) {
i_assert(error != NULL);
if (retriable_err_msg_r != NULL)
*retriable_err_msg_r = error;
}
} else
i_free(parser);
return ret;
}
void fts_parsers_unload(void)
{
unsigned int i;
for (i = 0; i < N_ELEMENTS(parsers); i++) {
if (parsers[i]->unload != NULL)
parsers[i]->unload();
}
}